gpuarithm module for arithmetics operations on matrices
This commit is contained in:
147
modules/gpuarithm/src/cuda/absdiff_mat.cu
Normal file
147
modules/gpuarithm/src/cuda/absdiff_mat.cu
Normal file
@@ -0,0 +1,147 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct VAbsDiff4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vabsdiff4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff4() {}
|
||||
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
|
||||
};
|
||||
|
||||
struct VAbsDiff2 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vabsdiff2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff2() {}
|
||||
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
|
||||
};
|
||||
|
||||
__device__ __forceinline__ int _abs(int a)
|
||||
{
|
||||
return ::abs(a);
|
||||
}
|
||||
__device__ __forceinline__ float _abs(float a)
|
||||
{
|
||||
return ::fabsf(a);
|
||||
}
|
||||
__device__ __forceinline__ double _abs(double a)
|
||||
{
|
||||
return ::fabs(a);
|
||||
}
|
||||
|
||||
template <typename T> struct AbsDiffMat : binary_function<T, T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(T a, T b) const
|
||||
{
|
||||
return saturate_cast<T>(_abs(a - b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AbsDiffMat() {}
|
||||
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits< arithm::VAbsDiff4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VAbsDiff2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< arithm::AbsDiffMat<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void absDiffMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VAbsDiff4(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void absDiffMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VAbsDiff2(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, AbsDiffMat<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void absDiffMat<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffMat<schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffMat<short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffMat<int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffMat<float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
98
modules/gpuarithm/src/cuda/absdiff_scalar.cu
Normal file
98
modules/gpuarithm/src/cuda/absdiff_scalar.cu
Normal file
@@ -0,0 +1,98 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S> struct AbsDiffScalar : unary_function<T, T>
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit AbsDiffScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ T operator ()(T a) const
|
||||
{
|
||||
abs_func<S> f;
|
||||
return saturate_cast<T>(f(a - val));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T, typename S> struct TransformFunctorTraits< arithm::AbsDiffScalar<T, S> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S>
|
||||
void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
AbsDiffScalar<T, S> op(static_cast<S>(val));
|
||||
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void absDiffScalar<uchar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffScalar<schar, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffScalar<ushort, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffScalar<short, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffScalar<int, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffScalar<float, float>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absDiffScalar<double, double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
185
modules/gpuarithm/src/cuda/add_mat.cu
Normal file
185
modules/gpuarithm/src/cuda/add_mat.cu
Normal file
@@ -0,0 +1,185 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct VAdd4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vadd4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd4() {}
|
||||
__device__ __forceinline__ VAdd4(const VAdd4& other) {}
|
||||
};
|
||||
|
||||
struct VAdd2 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vadd2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd2() {}
|
||||
__device__ __forceinline__ VAdd2(const VAdd2& other) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct AddMat : binary_function<T, T, D>
|
||||
{
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return saturate_cast<D>(a + b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AddMat() {}
|
||||
__device__ __forceinline__ AddMat(const AddMat& other) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits< arithm::VAdd4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VAdd2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::AddMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void addMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VAdd4(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void addMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VAdd2(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, AddMat<T, D>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void addMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
template void addMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
148
modules/gpuarithm/src/cuda/add_scalar.cu
Normal file
148
modules/gpuarithm/src/cuda/add_scalar.cu
Normal file
@@ -0,0 +1,148 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct AddScalar : unary_function<T, D>
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit AddScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
return saturate_cast<D>(a + val);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::AddScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D>
|
||||
void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
AddScalar<T, S, D> op(static_cast<S>(val));
|
||||
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void addScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
template void addScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void addScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void addScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void addScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
364
modules/gpuarithm/src/cuda/add_weighted.cu
Normal file
364
modules/gpuarithm/src/cuda/add_weighted.cu
Normal file
@@ -0,0 +1,364 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct UseDouble_
|
||||
{
|
||||
enum {value = 0};
|
||||
};
|
||||
template <> struct UseDouble_<double>
|
||||
{
|
||||
enum {value = 1};
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct UseDouble
|
||||
{
|
||||
enum {value = (UseDouble_<T1>::value || UseDouble_<T2>::value || UseDouble_<D>::value)};
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, bool useDouble> struct AddWeighted_;
|
||||
template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, false> : binary_function<T1, T2, D>
|
||||
{
|
||||
float alpha;
|
||||
float beta;
|
||||
float gamma;
|
||||
|
||||
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
|
||||
{
|
||||
return saturate_cast<D>(a * alpha + b * beta + gamma);
|
||||
}
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, true> : binary_function<T1, T2, D>
|
||||
{
|
||||
double alpha;
|
||||
double beta;
|
||||
double gamma;
|
||||
|
||||
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
|
||||
{
|
||||
return saturate_cast<D>(a * alpha + b * beta + gamma);
|
||||
}
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct AddWeighted : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>
|
||||
{
|
||||
AddWeighted(double alpha_, double beta_, double gamma_) : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>(alpha_, beta_, gamma_) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
|
||||
{
|
||||
};
|
||||
template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : arithm::ArithmFuncTraits<src_size, dst_size>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D> struct TransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T1, typename T2, typename D>
|
||||
void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
AddWeighted<T1, T2, D> op(alpha, beta, gamma);
|
||||
|
||||
cudev::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void addWeighted<uchar, uchar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, uchar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, uchar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, uchar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, uchar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, uchar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, uchar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<uchar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<uchar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<uchar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<uchar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<uchar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<uchar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<uchar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
template void addWeighted<schar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<schar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<schar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<schar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<schar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<schar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<schar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
template void addWeighted<ushort, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<ushort, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<ushort, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<ushort, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<ushort, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<ushort, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
template void addWeighted<short, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<short, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<short, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<short, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<short, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
template void addWeighted<int, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<int, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<int, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<int, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
template void addWeighted<float, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void addWeighted<float, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<float, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
|
||||
|
||||
template void addWeighted<double, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<double, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<double, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<double, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<double, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<double, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void addWeighted<double, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
145
modules/gpuarithm/src/cuda/arithm_func_traits.hpp
Normal file
145
modules/gpuarithm/src/cuda/arithm_func_traits.hpp
Normal file
@@ -0,0 +1,145 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __ARITHM_FUNC_TRAITS_HPP__
|
||||
#define __ARITHM_FUNC_TRAITS_HPP__
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <size_t src_size, size_t dst_size> struct ArithmFuncTraits
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 1 };
|
||||
};
|
||||
|
||||
template <> struct ArithmFuncTraits<1, 1>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct ArithmFuncTraits<1, 2>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct ArithmFuncTraits<1, 4>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
template <> struct ArithmFuncTraits<2, 1>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct ArithmFuncTraits<2, 2>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct ArithmFuncTraits<2, 4>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
template <> struct ArithmFuncTraits<4, 1>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct ArithmFuncTraits<4, 2>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
template <> struct ArithmFuncTraits<4, 4>
|
||||
{
|
||||
enum { simple_block_dim_x = 32 };
|
||||
enum { simple_block_dim_y = 8 };
|
||||
|
||||
enum { smart_block_dim_x = 32 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
}
|
||||
|
||||
#endif // __ARITHM_FUNC_TRAITS_HPP__
|
126
modules/gpuarithm/src/cuda/bitwise_mat.cu
Normal file
126
modules/gpuarithm/src/cuda/bitwise_mat.cu
Normal file
@@ -0,0 +1,126 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< bit_not<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_and<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_or<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< bit_xor<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, bit_not<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_and<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void bitMatOr(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_or<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, bit_xor<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void bitMatNot<uchar>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatNot<ushort>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatNot<uint>(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
template void bitMatAnd<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatAnd<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatAnd<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
template void bitMatOr<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatOr<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatOr<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
template void bitMatXor<uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatXor<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void bitMatXor<uint>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
104
modules/gpuarithm/src/cuda/bitwise_scalar.cu
Normal file
104
modules/gpuarithm/src/cuda/bitwise_scalar.cu
Normal file
@@ -0,0 +1,104 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_and<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_or<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< bit_xor<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> void bitScalarAnd(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_and<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void bitScalarOr(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_or<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void bitScalarXor(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(bit_xor<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void bitScalarAnd<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarAnd<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarAnd<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarAnd<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void bitScalarOr<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarOr<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarOr<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarOr<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void bitScalarXor<uchar>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarXor<ushort>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarXor<int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void bitScalarXor<unsigned int>(PtrStepSzb src1, uint src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
206
modules/gpuarithm/src/cuda/cmp_mat.cu
Normal file
206
modules/gpuarithm/src/cuda/cmp_mat.cu
Normal file
@@ -0,0 +1,206 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct VCmpEq4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmpeq4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpEq4() {}
|
||||
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
|
||||
};
|
||||
struct VCmpNe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmpne4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpNe4() {}
|
||||
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
|
||||
};
|
||||
struct VCmpLt4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmplt4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLt4() {}
|
||||
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
|
||||
};
|
||||
struct VCmpLe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vcmple4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLe4() {}
|
||||
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
|
||||
};
|
||||
|
||||
template <class Op, typename T>
|
||||
struct Cmp : binary_function<T, T, uchar>
|
||||
{
|
||||
__device__ __forceinline__ uchar operator()(T a, T b) const
|
||||
{
|
||||
Op op;
|
||||
return -op(a, b);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
|
||||
}
|
||||
void cmpMatNe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
|
||||
}
|
||||
void cmpMatLt_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
|
||||
}
|
||||
void cmpMatLe_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <template <typename> class Op, typename T>
|
||||
void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
Cmp<Op<T>, T> op;
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cmpMat<equal_to, T>(src1, src2, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cmpMat<not_equal_to, T>(src1, src2, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cmpMat<less, T>(src1, src2, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cmpMat<less_equal, T>(src1, src2, dst, stream);
|
||||
}
|
||||
|
||||
template void cmpMatEq<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatEq<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpMatNe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatNe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpMatLt<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLt<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpMatLe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpMatLe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
284
modules/gpuarithm/src/cuda/cmp_scalar.cu
Normal file
284
modules/gpuarithm/src/cuda/cmp_scalar.cu
Normal file
@@ -0,0 +1,284 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <class Op, typename T>
|
||||
struct Cmp : binary_function<T, T, uchar>
|
||||
{
|
||||
__device__ __forceinline__ uchar operator()(T a, T b) const
|
||||
{
|
||||
Op op;
|
||||
return -op(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type
|
||||
|
||||
template <class Op, typename T, int cn> struct CmpScalar;
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 1> : unary_function<T, uchar>
|
||||
{
|
||||
const T val;
|
||||
|
||||
__host__ explicit CmpScalar(T val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ uchar operator()(T src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
return op(src, val);
|
||||
}
|
||||
};
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 2> : unary_function<TYPE_VEC(T, 2), TYPE_VEC(uchar, 2)>
|
||||
{
|
||||
const TYPE_VEC(T, 2) val;
|
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
return VecTraits<TYPE_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y));
|
||||
}
|
||||
};
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 3> : unary_function<TYPE_VEC(T, 3), TYPE_VEC(uchar, 3)>
|
||||
{
|
||||
const TYPE_VEC(T, 3) val;
|
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 3) operator()(const TYPE_VEC(T, 3) & src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
return VecTraits<TYPE_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z));
|
||||
}
|
||||
};
|
||||
template <class Op, typename T>
|
||||
struct CmpScalar<Op, T, 4> : unary_function<TYPE_VEC(T, 4), TYPE_VEC(uchar, 4)>
|
||||
{
|
||||
const TYPE_VEC(T, 4) val;
|
||||
|
||||
__host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ TYPE_VEC(uchar, 4) operator()(const TYPE_VEC(T, 4) & src) const
|
||||
{
|
||||
Cmp<Op, T> op;
|
||||
return VecTraits<TYPE_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w));
|
||||
}
|
||||
};
|
||||
|
||||
#undef TYPE_VEC
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <template <typename> class Op, typename T, int cn>
|
||||
void cmpScalar(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type src_t;
|
||||
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
|
||||
|
||||
T sval[] = {static_cast<T>(val[0]), static_cast<T>(val[1]), static_cast<T>(val[2]), static_cast<T>(val[3])};
|
||||
src_t val1 = VecTraits<src_t>::make(sval);
|
||||
|
||||
CmpScalar<Op<T>, T, cn> op(val1);
|
||||
cudev::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<equal_to, T, 1>,
|
||||
cmpScalar<equal_to, T, 2>,
|
||||
cmpScalar<equal_to, T, 3>,
|
||||
cmpScalar<equal_to, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<not_equal_to, T, 1>,
|
||||
cmpScalar<not_equal_to, T, 2>,
|
||||
cmpScalar<not_equal_to, T, 3>,
|
||||
cmpScalar<not_equal_to, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<less, T, 1>,
|
||||
cmpScalar<less, T, 2>,
|
||||
cmpScalar<less, T, 3>,
|
||||
cmpScalar<less, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<less_equal, T, 1>,
|
||||
cmpScalar<less_equal, T, 2>,
|
||||
cmpScalar<less_equal, T, 3>,
|
||||
cmpScalar<less_equal, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<greater, T, 1>,
|
||||
cmpScalar<greater, T, 2>,
|
||||
cmpScalar<greater, T, 3>,
|
||||
cmpScalar<greater, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0,
|
||||
cmpScalar<greater_equal, T, 1>,
|
||||
cmpScalar<greater_equal, T, 2>,
|
||||
cmpScalar<greater_equal, T, 3>,
|
||||
cmpScalar<greater_equal, T, 4>
|
||||
};
|
||||
|
||||
funcs[cn](src, val, dst, stream);
|
||||
}
|
||||
|
||||
template void cmpScalarEq<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarEq<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarNe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarNe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarLt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarLe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarLe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarGt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void cmpScalarGe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
template void cmpScalarGe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
175
modules/gpuarithm/src/cuda/countnonzero.cu
Normal file
175
modules/gpuarithm/src/cuda/countnonzero.cu
Normal file
@@ -0,0 +1,175 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace countNonZero
|
||||
{
|
||||
__device__ unsigned int blocks_finished = 0;
|
||||
|
||||
template <int BLOCK_SIZE, typename T>
|
||||
__global__ void kernel(const PtrStepSz<T> src, unsigned int* count, const int twidth, const int theight)
|
||||
{
|
||||
__shared__ unsigned int scount[BLOCK_SIZE];
|
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
|
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
|
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
unsigned int mycount = 0;
|
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
|
||||
{
|
||||
const T* ptr = src.ptr(y);
|
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
|
||||
{
|
||||
const T srcVal = ptr[x];
|
||||
|
||||
mycount += (srcVal != 0);
|
||||
}
|
||||
}
|
||||
|
||||
cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
|
||||
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
if (tid == 0)
|
||||
::atomicAdd(count, mycount);
|
||||
#else
|
||||
__shared__ bool is_last;
|
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
count[bid] = mycount;
|
||||
|
||||
__threadfence();
|
||||
|
||||
unsigned int ticket = ::atomicInc(&blocks_finished, gridDim.x * gridDim.y);
|
||||
is_last = (ticket == gridDim.x * gridDim.y - 1);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (is_last)
|
||||
{
|
||||
mycount = tid < gridDim.x * gridDim.y ? count[tid] : 0;
|
||||
|
||||
cudev::reduce<BLOCK_SIZE>(scount, mycount, tid, plus<unsigned int>());
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
count[0] = mycount;
|
||||
|
||||
blocks_finished = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
const int threads_x = 32;
|
||||
const int threads_y = 8;
|
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
|
||||
{
|
||||
block = dim3(threads_x, threads_y);
|
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y),
|
||||
divUp(rows, block.y * block.x));
|
||||
|
||||
grid.x = ::min(grid.x, block.x);
|
||||
grid.y = ::min(grid.y, block.y);
|
||||
}
|
||||
|
||||
void getBufSize(int cols, int rows, int& bufcols, int& bufrows)
|
||||
{
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(cols, rows, block, grid);
|
||||
|
||||
bufcols = grid.x * grid.y * sizeof(int);
|
||||
bufrows = 1;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int run(const PtrStepSzb src, PtrStep<unsigned int> buf)
|
||||
{
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(src.cols, src.rows, block, grid);
|
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x);
|
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y);
|
||||
|
||||
unsigned int* count_buf = buf.ptr(0);
|
||||
|
||||
cudaSafeCall( cudaMemset(count_buf, 0, sizeof(unsigned int)) );
|
||||
|
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, count_buf, twidth, theight);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int count;
|
||||
cudaSafeCall(cudaMemcpy(&count, count_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost));
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
template int run<uchar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
template int run<schar >(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
template int run<ushort>(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
template int run<short >(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
template int run<int >(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
template int run<float >(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
template int run<double>(const PtrStepSzb src, PtrStep<unsigned int> buf);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
144
modules/gpuarithm/src/cuda/div_inv.cu
Normal file
144
modules/gpuarithm/src/cuda/div_inv.cu
Normal file
@@ -0,0 +1,144 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct DivInv : unary_function<T, D>
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit DivInv(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
return a != 0 ? saturate_cast<D>(val / a) : 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D>
|
||||
void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
DivInv<T, S, D> op(static_cast<S>(val));
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void divInv<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void divInv<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divInv<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divInv<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divInv<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divInv<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divInv<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divInv<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divInv<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
230
modules/gpuarithm/src/cuda/div_mat.cu
Normal file
230
modules/gpuarithm/src/cuda/div_mat.cu
Normal file
@@ -0,0 +1,230 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct Div_8uc4_32f : binary_function<uint, float, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, float b) const
|
||||
{
|
||||
uint res = 0;
|
||||
|
||||
if (b != 0)
|
||||
{
|
||||
b = 1.0f / b;
|
||||
res |= (saturate_cast<uchar>((0xffu & (a )) * b) );
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 8)) * b) << 8);
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
struct Div_16sc4_32f : binary_function<short4, float, short4>
|
||||
{
|
||||
__device__ __forceinline__ short4 operator ()(short4 a, float b) const
|
||||
{
|
||||
return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
|
||||
saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
|
||||
: make_short4(0,0,0,0);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct Div : binary_function<T, T, D>
|
||||
{
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? saturate_cast<D>(a / b) : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
};
|
||||
template <typename T> struct Div<T, float> : binary_function<T, T, float>
|
||||
{
|
||||
__device__ __forceinline__ float operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? static_cast<float>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
};
|
||||
template <typename T> struct Div<T, double> : binary_function<T, T, double>
|
||||
{
|
||||
__device__ __forceinline__ double operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? static_cast<double>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
|
||||
{
|
||||
S scale;
|
||||
|
||||
explicit DivScale(S scale_) : scale(scale_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return b != 0 ? saturate_cast<D>(scale * a / b) : 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D>
|
||||
void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
|
||||
{
|
||||
if (scale == 1)
|
||||
{
|
||||
Div<T, D> op;
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
DivScale<T, S, D> op(static_cast<S>(scale));
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void divMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
template void divMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void divMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void divMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void divMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
144
modules/gpuarithm/src/cuda/div_scalar.cu
Normal file
144
modules/gpuarithm/src/cuda/div_scalar.cu
Normal file
@@ -0,0 +1,144 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct DivScalar : unary_function<T, D>
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit DivScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
return saturate_cast<D>(a / val);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D>
|
||||
void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
DivScalar<T, S, D> op(static_cast<S>(val));
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void divScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void divScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void divScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void divScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
302
modules/gpuarithm/src/cuda/math.cu
Normal file
302
modules/gpuarithm/src/cuda/math.cu
Normal file
@@ -0,0 +1,302 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/type_traits.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// absMat
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< abs_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T>
|
||||
void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, abs_func<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void absMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void absMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// sqrMat
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct Sqr : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(T x) const
|
||||
{
|
||||
return saturate_cast<T>(x * x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Sqr() {}
|
||||
__device__ __forceinline__ Sqr(const Sqr& other) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< arithm::Sqr<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T>
|
||||
void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Sqr<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void sqrMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// sqrtMat
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< sqrt_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T>
|
||||
void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, sqrt_func<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void sqrtMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrtMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrtMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrtMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrtMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrtMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void sqrtMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// logMat
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< log_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T>
|
||||
void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, log_func<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void logMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void logMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void logMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void logMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void logMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void logMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void logMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// expMat
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T> struct Exp : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(T x) const
|
||||
{
|
||||
exp_func<T> f;
|
||||
return saturate_cast<T>(f(x));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Exp() {}
|
||||
__device__ __forceinline__ Exp(const Exp& other) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< arithm::Exp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T>
|
||||
void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, Exp<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void expMat<uchar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void expMat<schar>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void expMat<ushort>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void expMat<short>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void expMat<int>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void expMat<float>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void expMat<double>(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template<typename T, bool Signed = numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
|
||||
{
|
||||
float power;
|
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T e) const
|
||||
{
|
||||
return saturate_cast<T>(__powf((float)e, power));
|
||||
}
|
||||
};
|
||||
template<typename T> struct PowOp<T, true> : unary_function<T, T>
|
||||
{
|
||||
float power;
|
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T e) const
|
||||
{
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
if ((e < 0) && (1 & static_cast<int>(power)))
|
||||
res *= -1;
|
||||
|
||||
return res;
|
||||
}
|
||||
};
|
||||
template<> struct PowOp<float> : unary_function<float, float>
|
||||
{
|
||||
const float power;
|
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(float e) const
|
||||
{
|
||||
return __powf(::fabs(e), power);
|
||||
}
|
||||
};
|
||||
template<> struct PowOp<double> : unary_function<double, double>
|
||||
{
|
||||
double power;
|
||||
|
||||
PowOp(double power_) : power(power_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(double e) const
|
||||
{
|
||||
return ::pow(::fabs(e), power);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< arithm::PowOp<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template<typename T>
|
||||
void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src, (PtrStepSz<T>) dst, PowOp<T>(power), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void pow<uchar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pow<schar>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pow<short>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pow<ushort>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pow<int>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pow<float>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void pow<double>(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
246
modules/gpuarithm/src/cuda/minmax.cu
Normal file
246
modules/gpuarithm/src/cuda/minmax.cu
Normal file
@@ -0,0 +1,246 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace minMax
|
||||
{
|
||||
__device__ unsigned int blocks_finished = 0;
|
||||
|
||||
// To avoid shared bank conflicts we convert each value into value of
|
||||
// appropriate type (32 bits minimum)
|
||||
template <typename T> struct MinMaxTypeTraits;
|
||||
template <> struct MinMaxTypeTraits<uchar> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<schar> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<ushort> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
|
||||
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
|
||||
|
||||
template <int BLOCK_SIZE, typename R>
|
||||
struct GlobalReduce
|
||||
{
|
||||
static __device__ void run(R& mymin, R& mymax, R* minval, R* maxval, int tid, int bid, R* sminval, R* smaxval)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
if (tid == 0)
|
||||
{
|
||||
Emulation::glob::atomicMin(minval, mymin);
|
||||
Emulation::glob::atomicMax(maxval, mymax);
|
||||
}
|
||||
#else
|
||||
__shared__ bool is_last;
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[bid] = mymin;
|
||||
maxval[bid] = mymax;
|
||||
|
||||
__threadfence();
|
||||
|
||||
unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
|
||||
is_last = (ticket == gridDim.x * gridDim.y - 1);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (is_last)
|
||||
{
|
||||
int idx = ::min(tid, gridDim.x * gridDim.y - 1);
|
||||
|
||||
mymin = minval[idx];
|
||||
mymax = maxval[idx];
|
||||
|
||||
const minimum<R> minOp;
|
||||
const maximum<R> maxOp;
|
||||
cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[0] = mymin;
|
||||
maxval[0] = mymax;
|
||||
|
||||
blocks_finished = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <int BLOCK_SIZE, typename T, typename R, class Mask>
|
||||
__global__ void kernel(const PtrStepSz<T> src, const Mask mask, R* minval, R* maxval, const int twidth, const int theight)
|
||||
{
|
||||
__shared__ R sminval[BLOCK_SIZE];
|
||||
__shared__ R smaxval[BLOCK_SIZE];
|
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
|
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
|
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
|
||||
|
||||
R mymin = numeric_limits<R>::max();
|
||||
R mymax = -numeric_limits<R>::max();
|
||||
|
||||
const minimum<R> minOp;
|
||||
const maximum<R> maxOp;
|
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
|
||||
{
|
||||
const T* ptr = src.ptr(y);
|
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
|
||||
{
|
||||
if (mask(y, x))
|
||||
{
|
||||
const R srcVal = ptr[x];
|
||||
|
||||
mymin = minOp(mymin, srcVal);
|
||||
mymax = maxOp(mymax, srcVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudev::reduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax), tid, thrust::make_tuple(minOp, maxOp));
|
||||
|
||||
GlobalReduce<BLOCK_SIZE, R>::run(mymin, mymax, minval, maxval, tid, bid, sminval, smaxval);
|
||||
}
|
||||
|
||||
const int threads_x = 32;
|
||||
const int threads_y = 8;
|
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
|
||||
{
|
||||
block = dim3(threads_x, threads_y);
|
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y),
|
||||
divUp(rows, block.y * block.x));
|
||||
|
||||
grid.x = ::min(grid.x, block.x);
|
||||
grid.y = ::min(grid.y, block.y);
|
||||
}
|
||||
|
||||
void getBufSize(int cols, int rows, int& bufcols, int& bufrows)
|
||||
{
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(cols, rows, block, grid);
|
||||
|
||||
bufcols = grid.x * grid.y * sizeof(double);
|
||||
bufrows = 2;
|
||||
}
|
||||
|
||||
__global__ void setDefaultKernel(int* minval_buf, int* maxval_buf)
|
||||
{
|
||||
*minval_buf = numeric_limits<int>::max();
|
||||
*maxval_buf = numeric_limits<int>::min();
|
||||
}
|
||||
__global__ void setDefaultKernel(float* minval_buf, float* maxval_buf)
|
||||
{
|
||||
*minval_buf = numeric_limits<float>::max();
|
||||
*maxval_buf = -numeric_limits<float>::max();
|
||||
}
|
||||
__global__ void setDefaultKernel(double* minval_buf, double* maxval_buf)
|
||||
{
|
||||
*minval_buf = numeric_limits<double>::max();
|
||||
*maxval_buf = -numeric_limits<double>::max();
|
||||
}
|
||||
|
||||
template <typename R>
|
||||
void setDefault(R* minval_buf, R* maxval_buf)
|
||||
{
|
||||
setDefaultKernel<<<1, 1>>>(minval_buf, maxval_buf);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf)
|
||||
{
|
||||
typedef typename MinMaxTypeTraits<T>::best_type R;
|
||||
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(src.cols, src.rows, block, grid);
|
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x);
|
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y);
|
||||
|
||||
R* minval_buf = (R*) buf.ptr(0);
|
||||
R* maxval_buf = (R*) buf.ptr(1);
|
||||
|
||||
setDefault(minval_buf, maxval_buf);
|
||||
|
||||
if (mask.data)
|
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, twidth, theight);
|
||||
else
|
||||
kernel<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, twidth, theight);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
R minval_, maxval_;
|
||||
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(R), cudaMemcpyDeviceToHost) );
|
||||
*minval = minval_;
|
||||
*maxval = maxval_;
|
||||
}
|
||||
|
||||
template void run<uchar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
template void run<schar >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
template void run<ushort>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
228
modules/gpuarithm/src/cuda/minmax_mat.cu
Normal file
228
modules/gpuarithm/src/cuda/minmax_mat.cu
Normal file
@@ -0,0 +1,228 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// min
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct VMin4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vmin4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin4() {}
|
||||
__device__ __forceinline__ VMin4(const VMin4& other) {}
|
||||
};
|
||||
|
||||
struct VMin2 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vmin2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin2() {}
|
||||
__device__ __forceinline__ VMin2(const VMin2& other) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits< arithm::VMin4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VMin2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< minimum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< minimum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void minMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VMin4(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void minMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VMin2(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, minimum<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void minMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minMat<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void minScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minScalar<int >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void minScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// max
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct VMax4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vmax4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax4() {}
|
||||
__device__ __forceinline__ VMax4(const VMax4& other) {}
|
||||
};
|
||||
|
||||
struct VMax2 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vmax2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax2() {}
|
||||
__device__ __forceinline__ VMax2(const VMax2& other) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits< arithm::VMax4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VMax2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< maximum<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< binder2nd< maximum<T> > > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void maxMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VMax4(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void maxMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VMax2(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<T>) dst, maximum<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void maxMat<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxMat<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxMat<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxMat<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxMat<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxMat<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxMat<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) dst, cv::gpu::cudev::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void maxScalar<uchar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxScalar<schar >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxScalar<ushort>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxScalar<short >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxScalar<int >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxScalar<float >(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void maxScalar<double>(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
235
modules/gpuarithm/src/cuda/minmaxloc.cu
Normal file
235
modules/gpuarithm/src/cuda/minmaxloc.cu
Normal file
@@ -0,0 +1,235 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace minMaxLoc
|
||||
{
|
||||
// To avoid shared bank conflicts we convert each value into value of
|
||||
// appropriate type (32 bits minimum)
|
||||
template <typename T> struct MinMaxTypeTraits;
|
||||
template <> struct MinMaxTypeTraits<unsigned char> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<signed char> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<unsigned short> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<short> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<int> { typedef int best_type; };
|
||||
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
|
||||
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
|
||||
|
||||
template <int BLOCK_SIZE, typename T, class Mask>
|
||||
__global__ void kernel_pass_1(const PtrStepSz<T> src, const Mask mask, T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, const int twidth, const int theight)
|
||||
{
|
||||
typedef typename MinMaxTypeTraits<T>::best_type work_type;
|
||||
|
||||
__shared__ work_type sminval[BLOCK_SIZE];
|
||||
__shared__ work_type smaxval[BLOCK_SIZE];
|
||||
__shared__ unsigned int sminloc[BLOCK_SIZE];
|
||||
__shared__ unsigned int smaxloc[BLOCK_SIZE];
|
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
|
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
|
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
|
||||
|
||||
work_type mymin = numeric_limits<work_type>::max();
|
||||
work_type mymax = -numeric_limits<work_type>::max();
|
||||
unsigned int myminloc = 0;
|
||||
unsigned int mymaxloc = 0;
|
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
|
||||
{
|
||||
const T* ptr = src.ptr(y);
|
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
|
||||
{
|
||||
if (mask(y, x))
|
||||
{
|
||||
const work_type srcVal = ptr[x];
|
||||
|
||||
if (srcVal < mymin)
|
||||
{
|
||||
mymin = srcVal;
|
||||
myminloc = y * src.cols + x;
|
||||
}
|
||||
|
||||
if (srcVal > mymax)
|
||||
{
|
||||
mymax = srcVal;
|
||||
mymaxloc = y * src.cols + x;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
|
||||
smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
|
||||
tid,
|
||||
thrust::make_tuple(less<work_type>(), greater<work_type>()));
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
minval[bid] = (T) mymin;
|
||||
maxval[bid] = (T) mymax;
|
||||
minloc[bid] = myminloc;
|
||||
maxloc[bid] = mymaxloc;
|
||||
}
|
||||
}
|
||||
template <int BLOCK_SIZE, typename T>
|
||||
__global__ void kernel_pass_2(T* minval, T* maxval, unsigned int* minloc, unsigned int* maxloc, int count)
|
||||
{
|
||||
typedef typename MinMaxTypeTraits<T>::best_type work_type;
|
||||
|
||||
__shared__ work_type sminval[BLOCK_SIZE];
|
||||
__shared__ work_type smaxval[BLOCK_SIZE];
|
||||
__shared__ unsigned int sminloc[BLOCK_SIZE];
|
||||
__shared__ unsigned int smaxloc[BLOCK_SIZE];
|
||||
|
||||
unsigned int idx = ::min(threadIdx.x, count - 1);
|
||||
|
||||
work_type mymin = minval[idx];
|
||||
work_type mymax = maxval[idx];
|
||||
unsigned int myminloc = minloc[idx];
|
||||
unsigned int mymaxloc = maxloc[idx];
|
||||
|
||||
reduceKeyVal<BLOCK_SIZE>(smem_tuple(sminval, smaxval), thrust::tie(mymin, mymax),
|
||||
smem_tuple(sminloc, smaxloc), thrust::tie(myminloc, mymaxloc),
|
||||
threadIdx.x,
|
||||
thrust::make_tuple(less<work_type>(), greater<work_type>()));
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
minval[0] = (T) mymin;
|
||||
maxval[0] = (T) mymax;
|
||||
minloc[0] = myminloc;
|
||||
maxloc[0] = mymaxloc;
|
||||
}
|
||||
}
|
||||
|
||||
const int threads_x = 32;
|
||||
const int threads_y = 8;
|
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
|
||||
{
|
||||
block = dim3(threads_x, threads_y);
|
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y),
|
||||
divUp(rows, block.y * block.x));
|
||||
|
||||
grid.x = ::min(grid.x, block.x);
|
||||
grid.y = ::min(grid.y, block.y);
|
||||
}
|
||||
|
||||
void getBufSize(int cols, int rows, size_t elem_size, int& b1cols, int& b1rows, int& b2cols, int& b2rows)
|
||||
{
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(cols, rows, block, grid);
|
||||
|
||||
// For values
|
||||
b1cols = (int)(grid.x * grid.y * elem_size);
|
||||
b1rows = 2;
|
||||
|
||||
// For locations
|
||||
b2cols = grid.x * grid.y * sizeof(int);
|
||||
b2rows = 2;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf)
|
||||
{
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(src.cols, src.rows, block, grid);
|
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x);
|
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y);
|
||||
|
||||
T* minval_buf = (T*) valbuf.ptr(0);
|
||||
T* maxval_buf = (T*) valbuf.ptr(1);
|
||||
unsigned int* minloc_buf = locbuf.ptr(0);
|
||||
unsigned int* maxloc_buf = locbuf.ptr(1);
|
||||
|
||||
if (mask.data)
|
||||
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, SingleMask(mask), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
|
||||
else
|
||||
kernel_pass_1<threads_x * threads_y><<<grid, block>>>((PtrStepSz<T>) src, WithOutMask(), minval_buf, maxval_buf, minloc_buf, maxloc_buf, twidth, theight);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
kernel_pass_2<threads_x * threads_y><<<1, threads_x * threads_y>>>(minval_buf, maxval_buf, minloc_buf, maxloc_buf, grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
T minval_, maxval_;
|
||||
cudaSafeCall( cudaMemcpy(&minval_, minval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&maxval_, maxval_buf, sizeof(T), cudaMemcpyDeviceToHost) );
|
||||
*minval = minval_;
|
||||
*maxval = maxval_;
|
||||
|
||||
unsigned int minloc_, maxloc_;
|
||||
cudaSafeCall( cudaMemcpy(&minloc_, minloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpy(&maxloc_, maxloc_buf, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
minloc[1] = minloc_ / src.cols; minloc[0] = minloc_ - minloc[1] * src.cols;
|
||||
maxloc[1] = maxloc_ / src.cols; maxloc[0] = maxloc_ - maxloc[1] * src.cols;
|
||||
}
|
||||
|
||||
template void run<unsigned char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
template void run<signed char >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
template void run<unsigned short>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
template void run<short >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
template void run<int >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
template void run<float >(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
template void run<double>(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep<unsigned int> locbuf);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
211
modules/gpuarithm/src/cuda/mul_mat.cu
Normal file
211
modules/gpuarithm/src/cuda/mul_mat.cu
Normal file
@@ -0,0 +1,211 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct Mul_8uc4_32f : binary_function<uint, float, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, float b) const
|
||||
{
|
||||
uint res = 0;
|
||||
|
||||
res |= (saturate_cast<uchar>((0xffu & (a )) * b) );
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 8)) * b) << 8);
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
|
||||
res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_8uc4_32f() {}
|
||||
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
|
||||
};
|
||||
|
||||
struct Mul_16sc4_32f : binary_function<short4, float, short4>
|
||||
{
|
||||
__device__ __forceinline__ short4 operator ()(short4 a, float b) const
|
||||
{
|
||||
return make_short4(saturate_cast<short>(a.x * b), saturate_cast<short>(a.y * b),
|
||||
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_16sc4_32f() {}
|
||||
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct Mul : binary_function<T, T, D>
|
||||
{
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return saturate_cast<D>(a * b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul() {}
|
||||
__device__ __forceinline__ Mul(const Mul& other) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
|
||||
{
|
||||
S scale;
|
||||
|
||||
explicit MulScale(S scale_) : scale(scale_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return saturate_cast<D>(scale * a * b);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::Mul<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void mulMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void mulMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D>
|
||||
void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
|
||||
{
|
||||
if (scale == 1)
|
||||
{
|
||||
Mul<T, D> op;
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
MulScale<T, S, D> op(static_cast<S>(scale));
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void mulMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
template void mulMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void mulMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void mulMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void mulMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void mulMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
|
||||
//template void mulMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
//template void mulMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
template void mulMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
144
modules/gpuarithm/src/cuda/mul_scalar.cu
Normal file
144
modules/gpuarithm/src/cuda/mul_scalar.cu
Normal file
@@ -0,0 +1,144 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct MulScalar : unary_function<T, D>
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit MulScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
return saturate_cast<D>(a * val);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D>
|
||||
void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
MulScalar<T, S, D> op(static_cast<S>(val));
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
template void mulScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void mulScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void mulScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void mulScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void mulScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
|
||||
//template void mulScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
//template void mulScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
template void mulScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
217
modules/gpuarithm/src/cuda/polar_cart.cu
Normal file
217
modules/gpuarithm/src/cuda/polar_cart.cu
Normal file
@@ -0,0 +1,217 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace mathfunc
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Cart <-> Polar
|
||||
|
||||
struct Nothing
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
||||
{
|
||||
}
|
||||
};
|
||||
struct Magnitude
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
|
||||
}
|
||||
};
|
||||
struct MagnitudeSqr
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
|
||||
}
|
||||
};
|
||||
struct Atan2
|
||||
{
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
||||
{
|
||||
float angle = ::atan2f(y_data, x_data);
|
||||
angle += (angle < 0) * 2.0f * CV_PI_F;
|
||||
dst[y * dst_step + x] = scale * angle;
|
||||
}
|
||||
};
|
||||
template <typename Mag, typename Angle>
|
||||
__global__ void cartToPolar(const float* xptr, size_t x_step, const float* yptr, size_t y_step,
|
||||
float* mag, size_t mag_step, float* angle, size_t angle_step, float scale, int width, int height)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
float x_data = xptr[y * x_step + x];
|
||||
float y_data = yptr[y * y_step + x];
|
||||
|
||||
Mag::calc(x, y, x_data, y_data, mag, mag_step, scale);
|
||||
Angle::calc(x, y, x_data, y_data, angle, angle_step, scale);
|
||||
}
|
||||
}
|
||||
|
||||
struct NonEmptyMag
|
||||
{
|
||||
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
|
||||
{
|
||||
return mag[y * mag_step + x];
|
||||
}
|
||||
};
|
||||
struct EmptyMag
|
||||
{
|
||||
static __device__ __forceinline__ float get(const float*, size_t, int, int)
|
||||
{
|
||||
return 1.0f;
|
||||
}
|
||||
};
|
||||
template <typename Mag>
|
||||
__global__ void polarToCart(const float* mag, size_t mag_step, const float* angle, size_t angle_step, float scale,
|
||||
float* xptr, size_t x_step, float* yptr, size_t y_step, int width, int height)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < width && y < height)
|
||||
{
|
||||
float mag_data = Mag::get(mag, mag_step, x, y);
|
||||
float angle_data = angle[y * angle_step + x];
|
||||
float sin_a, cos_a;
|
||||
|
||||
::sincosf(scale * angle_data, &sin_a, &cos_a);
|
||||
|
||||
xptr[y * x_step + x] = mag_data * cos_a;
|
||||
yptr[y * y_step + x] = mag_data * sin_a;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Mag, typename Angle>
|
||||
void cartToPolar_caller(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(x.cols, threads.x);
|
||||
grid.y = divUp(x.rows, threads.y);
|
||||
|
||||
const float scale = angleInDegrees ? (180.0f / CV_PI_F) : 1.f;
|
||||
|
||||
cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
|
||||
x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(),
|
||||
mag.data, mag.step/mag.elemSize(), angle.data, angle.step/angle.elemSize(), scale, x.cols, x.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2][2][2] =
|
||||
{
|
||||
{
|
||||
{
|
||||
cartToPolar_caller<Magnitude, Atan2>,
|
||||
cartToPolar_caller<Magnitude, Nothing>
|
||||
},
|
||||
{
|
||||
cartToPolar_caller<MagnitudeSqr, Atan2>,
|
||||
cartToPolar_caller<MagnitudeSqr, Nothing>,
|
||||
}
|
||||
},
|
||||
{
|
||||
{
|
||||
cartToPolar_caller<Nothing, Atan2>,
|
||||
cartToPolar_caller<Nothing, Nothing>
|
||||
},
|
||||
{
|
||||
cartToPolar_caller<Nothing, Atan2>,
|
||||
cartToPolar_caller<Nothing, Nothing>,
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
callers[mag.data == 0][magSqr][angle.data == 0](x, y, mag, angle, angleInDegrees, stream);
|
||||
}
|
||||
|
||||
template <typename Mag>
|
||||
void polarToCart_caller(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(mag.cols, threads.x);
|
||||
grid.y = divUp(mag.rows, threads.y);
|
||||
|
||||
const float scale = angleInDegrees ? (CV_PI_F / 180.0f) : 1.0f;
|
||||
|
||||
polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.data, mag.step/mag.elemSize(),
|
||||
angle.data, angle.step/angle.elemSize(), scale, x.data, x.step/x.elemSize(), y.data, y.step/y.elemSize(), mag.cols, mag.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
|
||||
static const caller_t callers[2] =
|
||||
{
|
||||
polarToCart_caller<NonEmptyMag>,
|
||||
polarToCart_caller<EmptyMag>
|
||||
};
|
||||
|
||||
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
||||
}
|
||||
} // namespace mathfunc
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
330
modules/gpuarithm/src/cuda/reduce.cu
Normal file
330
modules/gpuarithm/src/cuda/reduce.cu
Normal file
@@ -0,0 +1,330 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
|
||||
#include "unroll_detail.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace reduce
|
||||
{
|
||||
struct Sum
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T startValue() const
|
||||
{
|
||||
return VecTraits<T>::all(0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator ()(T a, T b) const
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T result(T r, double) const
|
||||
{
|
||||
return r;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Sum() {}
|
||||
__device__ __forceinline__ Sum(const Sum&) {}
|
||||
};
|
||||
|
||||
struct Avg
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T startValue() const
|
||||
{
|
||||
return VecTraits<T>::all(0);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator ()(T a, T b) const
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ typename TypeVec<double, VecTraits<T>::cn>::vec_type result(T r, double sz) const
|
||||
{
|
||||
return r / sz;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Avg() {}
|
||||
__device__ __forceinline__ Avg(const Avg&) {}
|
||||
};
|
||||
|
||||
struct Min
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T startValue() const
|
||||
{
|
||||
return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator ()(T a, T b) const
|
||||
{
|
||||
minimum<T> minOp;
|
||||
return minOp(a, b);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T result(T r, double) const
|
||||
{
|
||||
return r;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Min() {}
|
||||
__device__ __forceinline__ Min(const Min&) {}
|
||||
};
|
||||
|
||||
struct Max
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T startValue() const
|
||||
{
|
||||
return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator ()(T a, T b) const
|
||||
{
|
||||
maximum<T> maxOp;
|
||||
return maxOp(a, b);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T result(T r, double) const
|
||||
{
|
||||
return r;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Max() {}
|
||||
__device__ __forceinline__ Max(const Max&) {}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T, typename S, typename D, class Op>
|
||||
__global__ void rowsKernel(const PtrStepSz<T> src, D* dst, const Op op)
|
||||
{
|
||||
__shared__ S smem[16 * 16];
|
||||
|
||||
const int x = blockIdx.x * 16 + threadIdx.x;
|
||||
|
||||
S myVal = op.template startValue<S>();
|
||||
|
||||
if (x < src.cols)
|
||||
{
|
||||
for (int y = threadIdx.y; y < src.rows; y += 16)
|
||||
{
|
||||
S srcVal = src(y, x);
|
||||
myVal = op(myVal, srcVal);
|
||||
}
|
||||
}
|
||||
|
||||
smem[threadIdx.x * 16 + threadIdx.y] = myVal;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
volatile S* srow = smem + threadIdx.y * 16;
|
||||
|
||||
myVal = srow[threadIdx.x];
|
||||
cudev::reduce<16>(srow, myVal, threadIdx.x, op);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
srow[0] = myVal;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.y == 0 && x < src.cols)
|
||||
dst[x] = (D) op.result(smem[threadIdx.x * 16], src.rows);
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D, class Op>
|
||||
void rowsCaller(PtrStepSz<T> src, D* dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(src.cols, block.x));
|
||||
|
||||
Op op;
|
||||
rowsKernel<T, S, D, Op><<<grid, block, 0, stream>>>(src, dst, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D>
|
||||
void rows(PtrStepSzb src, void* dst, int op, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSz<T> src, D* dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
rowsCaller<T, S, D, Sum>,
|
||||
rowsCaller<T, S, D, Avg>,
|
||||
rowsCaller<T, S, D, Max>,
|
||||
rowsCaller<T, S, D, Min>
|
||||
};
|
||||
|
||||
funcs[op]((PtrStepSz<T>) src, (D*) dst, stream);
|
||||
}
|
||||
|
||||
template void rows<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<unsigned char, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<unsigned char, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<unsigned char, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
|
||||
template void rows<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<unsigned short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<unsigned short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<unsigned short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
|
||||
template void rows<short, int, short>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<short, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<short, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<short, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
|
||||
template void rows<int, int, int>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<int, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<int, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
|
||||
template void rows<float, float, float>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
template void rows<float, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
|
||||
template void rows<double, double, double>(PtrStepSzb src, void* dst, int op, cudaStream_t stream);
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
|
||||
template <int BLOCK_SIZE, typename T, typename S, typename D, int cn, class Op>
|
||||
__global__ void colsKernel(const PtrStepSz<typename TypeVec<T, cn>::vec_type> src, typename TypeVec<D, cn>::vec_type* dst, const Op op)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type src_type;
|
||||
typedef typename TypeVec<S, cn>::vec_type work_type;
|
||||
typedef typename TypeVec<D, cn>::vec_type dst_type;
|
||||
|
||||
__shared__ S smem[BLOCK_SIZE * cn];
|
||||
|
||||
const int y = blockIdx.x;
|
||||
|
||||
const src_type* srcRow = src.ptr(y);
|
||||
|
||||
work_type myVal = op.template startValue<work_type>();
|
||||
|
||||
for (int x = threadIdx.x; x < src.cols; x += BLOCK_SIZE)
|
||||
myVal = op(myVal, saturate_cast<work_type>(srcRow[x]));
|
||||
|
||||
cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(myVal), threadIdx.x, detail::Unroll<cn>::op(op));
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
dst[y] = saturate_cast<dst_type>(op.result(myVal, src.cols));
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D, int cn, class Op> void colsCaller(PtrStepSzb src, void* dst, cudaStream_t stream)
|
||||
{
|
||||
const int BLOCK_SIZE = 256;
|
||||
|
||||
const dim3 block(BLOCK_SIZE);
|
||||
const dim3 grid(src.rows);
|
||||
|
||||
Op op;
|
||||
colsKernel<BLOCK_SIZE, T, S, D, cn, Op><<<grid, block, 0, stream>>>((PtrStepSz<typename TypeVec<T, cn>::vec_type>) src, (typename TypeVec<D, cn>::vec_type*) dst, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
}
|
||||
|
||||
template <typename T, typename S, typename D> void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*func_t)(PtrStepSzb src, void* dst, cudaStream_t stream);
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{0,0,0,0},
|
||||
{colsCaller<T, S, D, 1, Sum>, colsCaller<T, S, D, 1, Avg>, colsCaller<T, S, D, 1, Max>, colsCaller<T, S, D, 1, Min>},
|
||||
{colsCaller<T, S, D, 2, Sum>, colsCaller<T, S, D, 2, Avg>, colsCaller<T, S, D, 2, Max>, colsCaller<T, S, D, 2, Min>},
|
||||
{colsCaller<T, S, D, 3, Sum>, colsCaller<T, S, D, 3, Avg>, colsCaller<T, S, D, 3, Max>, colsCaller<T, S, D, 3, Min>},
|
||||
{colsCaller<T, S, D, 4, Sum>, colsCaller<T, S, D, 4, Avg>, colsCaller<T, S, D, 4, Max>, colsCaller<T, S, D, 4, Min>},
|
||||
};
|
||||
|
||||
funcs[cn][op](src, dst, stream);
|
||||
}
|
||||
|
||||
template void cols<unsigned char, int, unsigned char>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<unsigned char, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<unsigned char, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<unsigned char, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
|
||||
template void cols<unsigned short, int, unsigned short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<unsigned short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<unsigned short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<unsigned short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
|
||||
template void cols<short, int, short>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<short, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<short, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<short, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
|
||||
template void cols<int, int, int>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<int, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<int, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
|
||||
template void cols<float, float, float>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
template void cols<float, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
|
||||
template void cols<double, double, double>(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
511
modules/gpuarithm/src/cuda/split_merge.cu
Normal file
511
modules/gpuarithm/src/cuda/split_merge.cu
Normal file
@@ -0,0 +1,511 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace split_merge
|
||||
{
|
||||
template <typename T, size_t elem_size = sizeof(T)>
|
||||
struct TypeTraits
|
||||
{
|
||||
typedef T type;
|
||||
typedef T type2;
|
||||
typedef T type3;
|
||||
typedef T type4;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct TypeTraits<T, 1>
|
||||
{
|
||||
typedef char type;
|
||||
typedef char2 type2;
|
||||
typedef char3 type3;
|
||||
typedef char4 type4;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct TypeTraits<T, 2>
|
||||
{
|
||||
typedef short type;
|
||||
typedef short2 type2;
|
||||
typedef short3 type3;
|
||||
typedef short4 type4;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct TypeTraits<T, 4>
|
||||
{
|
||||
typedef int type;
|
||||
typedef int2 type2;
|
||||
typedef int3 type3;
|
||||
typedef int4 type4;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct TypeTraits<T, 8>
|
||||
{
|
||||
typedef double type;
|
||||
typedef double2 type2;
|
||||
//typedef double3 type3;
|
||||
//typedef double4 type3;
|
||||
};
|
||||
|
||||
typedef void (*MergeFunction)(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream);
|
||||
typedef void (*SplitFunction)(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream);
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Merge
|
||||
|
||||
template <typename T>
|
||||
__global__ void mergeC2_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type2 dst_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const T* src0_y = (const T*)(src0 + y * src0_step);
|
||||
const T* src1_y = (const T*)(src1 + y * src1_step);
|
||||
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_type dst_elem;
|
||||
dst_elem.x = src0_y[x];
|
||||
dst_elem.y = src1_y[x];
|
||||
dst_y[x] = dst_elem;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void mergeC3_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type3 dst_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const T* src0_y = (const T*)(src0 + y * src0_step);
|
||||
const T* src1_y = (const T*)(src1 + y * src1_step);
|
||||
const T* src2_y = (const T*)(src2 + y * src2_step);
|
||||
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_type dst_elem;
|
||||
dst_elem.x = src0_y[x];
|
||||
dst_elem.y = src1_y[x];
|
||||
dst_elem.z = src2_y[x];
|
||||
dst_y[x] = dst_elem;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
__global__ void mergeC3_<double>(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const double* src0_y = (const double*)(src0 + y * src0_step);
|
||||
const double* src1_y = (const double*)(src1 + y * src1_step);
|
||||
const double* src2_y = (const double*)(src2 + y * src2_step);
|
||||
double* dst_y = (double*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_y[3 * x] = src0_y[x];
|
||||
dst_y[3 * x + 1] = src1_y[x];
|
||||
dst_y[3 * x + 2] = src2_y[x];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void mergeC4_(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
const uchar* src3, size_t src3_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type4 dst_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const T* src0_y = (const T*)(src0 + y * src0_step);
|
||||
const T* src1_y = (const T*)(src1 + y * src1_step);
|
||||
const T* src2_y = (const T*)(src2 + y * src2_step);
|
||||
const T* src3_y = (const T*)(src3 + y * src3_step);
|
||||
dst_type* dst_y = (dst_type*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_type dst_elem;
|
||||
dst_elem.x = src0_y[x];
|
||||
dst_elem.y = src1_y[x];
|
||||
dst_elem.z = src2_y[x];
|
||||
dst_elem.w = src3_y[x];
|
||||
dst_y[x] = dst_elem;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
__global__ void mergeC4_<double>(const uchar* src0, size_t src0_step,
|
||||
const uchar* src1, size_t src1_step,
|
||||
const uchar* src2, size_t src2_step,
|
||||
const uchar* src3, size_t src3_step,
|
||||
int rows, int cols, uchar* dst, size_t dst_step)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const double* src0_y = (const double*)(src0 + y * src0_step);
|
||||
const double* src1_y = (const double*)(src1 + y * src1_step);
|
||||
const double* src2_y = (const double*)(src2 + y * src2_step);
|
||||
const double* src3_y = (const double*)(src3 + y * src3_step);
|
||||
double2* dst_y = (double2*)(dst + y * dst_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst_y[2 * x] = make_double2(src0_y[x], src1_y[x]);
|
||||
dst_y[2 * x + 1] = make_double2(src2_y[x], src3_y[x]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void mergeC2_(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
mergeC2_<T><<<grid, block, 0, stream>>>(
|
||||
src[0].data, src[0].step,
|
||||
src[1].data, src[1].step,
|
||||
dst.rows, dst.cols, dst.data, dst.step);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void mergeC3_(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
mergeC3_<T><<<grid, block, 0, stream>>>(
|
||||
src[0].data, src[0].step,
|
||||
src[1].data, src[1].step,
|
||||
src[2].data, src[2].step,
|
||||
dst.rows, dst.cols, dst.data, dst.step);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void mergeC4_(const PtrStepSzb* src, PtrStepSzb& dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
mergeC4_<T><<<grid, block, 0, stream>>>(
|
||||
src[0].data, src[0].step,
|
||||
src[1].data, src[1].step,
|
||||
src[2].data, src[2].step,
|
||||
src[3].data, src[3].step,
|
||||
dst.rows, dst.cols, dst.data, dst.step);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void merge_caller(const PtrStepSzb* src, PtrStepSzb& dst,
|
||||
int total_channels, size_t elem_size,
|
||||
const cudaStream_t& stream)
|
||||
{
|
||||
static MergeFunction merge_func_tbl[] =
|
||||
{
|
||||
mergeC2_<char>, mergeC2_<short>, mergeC2_<int>, 0, mergeC2_<double>,
|
||||
mergeC3_<char>, mergeC3_<short>, mergeC3_<int>, 0, mergeC3_<double>,
|
||||
mergeC4_<char>, mergeC4_<short>, mergeC4_<int>, 0, mergeC4_<double>,
|
||||
};
|
||||
|
||||
size_t merge_func_id = (total_channels - 2) * 5 + (elem_size >> 1);
|
||||
MergeFunction merge_func = merge_func_tbl[merge_func_id];
|
||||
|
||||
if (merge_func == 0)
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
|
||||
|
||||
merge_func(src, dst, stream);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//------------------------------------------------------------
|
||||
// Split
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void splitC2_(const uchar* src, size_t src_step,
|
||||
int rows, int cols,
|
||||
uchar* dst0, size_t dst0_step,
|
||||
uchar* dst1, size_t dst1_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type2 src_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const src_type* src_y = (const src_type*)(src + y * src_step);
|
||||
T* dst0_y = (T*)(dst0 + y * dst0_step);
|
||||
T* dst1_y = (T*)(dst1 + y * dst1_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
src_type src_elem = src_y[x];
|
||||
dst0_y[x] = src_elem.x;
|
||||
dst1_y[x] = src_elem.y;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void splitC3_(const uchar* src, size_t src_step,
|
||||
int rows, int cols,
|
||||
uchar* dst0, size_t dst0_step,
|
||||
uchar* dst1, size_t dst1_step,
|
||||
uchar* dst2, size_t dst2_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type3 src_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const src_type* src_y = (const src_type*)(src + y * src_step);
|
||||
T* dst0_y = (T*)(dst0 + y * dst0_step);
|
||||
T* dst1_y = (T*)(dst1 + y * dst1_step);
|
||||
T* dst2_y = (T*)(dst2 + y * dst2_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
src_type src_elem = src_y[x];
|
||||
dst0_y[x] = src_elem.x;
|
||||
dst1_y[x] = src_elem.y;
|
||||
dst2_y[x] = src_elem.z;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
__global__ void splitC3_<double>(
|
||||
const uchar* src, size_t src_step, int rows, int cols,
|
||||
uchar* dst0, size_t dst0_step,
|
||||
uchar* dst1, size_t dst1_step,
|
||||
uchar* dst2, size_t dst2_step)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const double* src_y = (const double*)(src + y * src_step);
|
||||
double* dst0_y = (double*)(dst0 + y * dst0_step);
|
||||
double* dst1_y = (double*)(dst1 + y * dst1_step);
|
||||
double* dst2_y = (double*)(dst2 + y * dst2_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
dst0_y[x] = src_y[3 * x];
|
||||
dst1_y[x] = src_y[3 * x + 1];
|
||||
dst2_y[x] = src_y[3 * x + 2];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void splitC4_(const uchar* src, size_t src_step, int rows, int cols,
|
||||
uchar* dst0, size_t dst0_step,
|
||||
uchar* dst1, size_t dst1_step,
|
||||
uchar* dst2, size_t dst2_step,
|
||||
uchar* dst3, size_t dst3_step)
|
||||
{
|
||||
typedef typename TypeTraits<T>::type4 src_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const src_type* src_y = (const src_type*)(src + y * src_step);
|
||||
T* dst0_y = (T*)(dst0 + y * dst0_step);
|
||||
T* dst1_y = (T*)(dst1 + y * dst1_step);
|
||||
T* dst2_y = (T*)(dst2 + y * dst2_step);
|
||||
T* dst3_y = (T*)(dst3 + y * dst3_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
src_type src_elem = src_y[x];
|
||||
dst0_y[x] = src_elem.x;
|
||||
dst1_y[x] = src_elem.y;
|
||||
dst2_y[x] = src_elem.z;
|
||||
dst3_y[x] = src_elem.w;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
__global__ void splitC4_<double>(
|
||||
const uchar* src, size_t src_step, int rows, int cols,
|
||||
uchar* dst0, size_t dst0_step,
|
||||
uchar* dst1, size_t dst1_step,
|
||||
uchar* dst2, size_t dst2_step,
|
||||
uchar* dst3, size_t dst3_step)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const double2* src_y = (const double2*)(src + y * src_step);
|
||||
double* dst0_y = (double*)(dst0 + y * dst0_step);
|
||||
double* dst1_y = (double*)(dst1 + y * dst1_step);
|
||||
double* dst2_y = (double*)(dst2 + y * dst2_step);
|
||||
double* dst3_y = (double*)(dst3 + y * dst3_step);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
double2 src_elem1 = src_y[2 * x];
|
||||
double2 src_elem2 = src_y[2 * x + 1];
|
||||
dst0_y[x] = src_elem1.x;
|
||||
dst1_y[x] = src_elem1.y;
|
||||
dst2_y[x] = src_elem2.x;
|
||||
dst3_y[x] = src_elem2.y;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void splitC2_(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
splitC2_<T><<<grid, block, 0, stream>>>(
|
||||
src.data, src.step, src.rows, src.cols,
|
||||
dst[0].data, dst[0].step,
|
||||
dst[1].data, dst[1].step);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void splitC3_(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
splitC3_<T><<<grid, block, 0, stream>>>(
|
||||
src.data, src.step, src.rows, src.cols,
|
||||
dst[0].data, dst[0].step,
|
||||
dst[1].data, dst[1].step,
|
||||
dst[2].data, dst[2].step);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
static void splitC4_(const PtrStepSzb& src, PtrStepSzb* dst, const cudaStream_t& stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
splitC4_<T><<<grid, block, 0, stream>>>(
|
||||
src.data, src.step, src.rows, src.cols,
|
||||
dst[0].data, dst[0].step,
|
||||
dst[1].data, dst[1].step,
|
||||
dst[2].data, dst[2].step,
|
||||
dst[3].data, dst[3].step);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
void split_caller(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream)
|
||||
{
|
||||
static SplitFunction split_func_tbl[] =
|
||||
{
|
||||
splitC2_<char>, splitC2_<short>, splitC2_<int>, 0, splitC2_<double>,
|
||||
splitC3_<char>, splitC3_<short>, splitC3_<int>, 0, splitC3_<double>,
|
||||
splitC4_<char>, splitC4_<short>, splitC4_<int>, 0, splitC4_<double>,
|
||||
};
|
||||
|
||||
size_t split_func_id = (num_channels - 2) * 5 + (elem_size1 >> 1);
|
||||
SplitFunction split_func = split_func_tbl[split_func_id];
|
||||
|
||||
if (split_func == 0)
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
|
||||
|
||||
split_func(src, dst, stream);
|
||||
}
|
||||
} // namespace split_merge
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
185
modules/gpuarithm/src/cuda/sub_mat.cu
Normal file
185
modules/gpuarithm/src/cuda/sub_mat.cu
Normal file
@@ -0,0 +1,185 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
struct VSub4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vsub4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub4() {}
|
||||
__device__ __forceinline__ VSub4(const VSub4& other) {}
|
||||
};
|
||||
|
||||
struct VSub2 : binary_function<uint, uint, uint>
|
||||
{
|
||||
__device__ __forceinline__ uint operator ()(uint a, uint b) const
|
||||
{
|
||||
return vsub2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub2() {}
|
||||
__device__ __forceinline__ VSub2(const VSub2& other) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct SubMat : binary_function<T, T, D>
|
||||
{
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
return saturate_cast<D>(a - b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ SubMat() {}
|
||||
__device__ __forceinline__ SubMat(const SubMat& other) {}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
|
||||
{
|
||||
cudev::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename D>
|
||||
void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
template void subMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
148
modules/gpuarithm/src/cuda/sub_scalar.cu
Normal file
148
modules/gpuarithm/src/cuda/sub_scalar.cu
Normal file
@@ -0,0 +1,148 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D> struct SubScalar : unary_function<T, D>
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit SubScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
return saturate_cast<D>(a - val);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <typename T, typename S, typename D>
|
||||
void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
|
||||
{
|
||||
SubScalar<T, S, D> op(static_cast<S>(val));
|
||||
|
||||
if (mask.data)
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
|
||||
else
|
||||
cudev::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
template void subScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
|
||||
//template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
//template void subScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
template void subScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
380
modules/gpuarithm/src/cuda/sum.cu
Normal file
380
modules/gpuarithm/src/cuda/sum.cu
Normal file
@@ -0,0 +1,380 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
|
||||
#include "unroll_detail.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace sum
|
||||
{
|
||||
__device__ unsigned int blocks_finished = 0;
|
||||
|
||||
template <typename R, int cn> struct AtomicAdd;
|
||||
template <typename R> struct AtomicAdd<R, 1>
|
||||
{
|
||||
static __device__ void run(R* ptr, R val)
|
||||
{
|
||||
Emulation::glob::atomicAdd(ptr, val);
|
||||
}
|
||||
};
|
||||
template <typename R> struct AtomicAdd<R, 2>
|
||||
{
|
||||
typedef typename TypeVec<R, 2>::vec_type val_type;
|
||||
|
||||
static __device__ void run(R* ptr, val_type val)
|
||||
{
|
||||
Emulation::glob::atomicAdd(ptr, val.x);
|
||||
Emulation::glob::atomicAdd(ptr + 1, val.y);
|
||||
}
|
||||
};
|
||||
template <typename R> struct AtomicAdd<R, 3>
|
||||
{
|
||||
typedef typename TypeVec<R, 3>::vec_type val_type;
|
||||
|
||||
static __device__ void run(R* ptr, val_type val)
|
||||
{
|
||||
Emulation::glob::atomicAdd(ptr, val.x);
|
||||
Emulation::glob::atomicAdd(ptr + 1, val.y);
|
||||
Emulation::glob::atomicAdd(ptr + 2, val.z);
|
||||
}
|
||||
};
|
||||
template <typename R> struct AtomicAdd<R, 4>
|
||||
{
|
||||
typedef typename TypeVec<R, 4>::vec_type val_type;
|
||||
|
||||
static __device__ void run(R* ptr, val_type val)
|
||||
{
|
||||
Emulation::glob::atomicAdd(ptr, val.x);
|
||||
Emulation::glob::atomicAdd(ptr + 1, val.y);
|
||||
Emulation::glob::atomicAdd(ptr + 2, val.z);
|
||||
Emulation::glob::atomicAdd(ptr + 3, val.w);
|
||||
}
|
||||
};
|
||||
|
||||
template <int BLOCK_SIZE, typename R, int cn>
|
||||
struct GlobalReduce
|
||||
{
|
||||
typedef typename TypeVec<R, cn>::vec_type result_type;
|
||||
|
||||
static __device__ void run(result_type& sum, result_type* result, int tid, int bid, R* smem)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
if (tid == 0)
|
||||
AtomicAdd<R, cn>::run((R*) result, sum);
|
||||
#else
|
||||
__shared__ bool is_last;
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
result[bid] = sum;
|
||||
|
||||
__threadfence();
|
||||
|
||||
unsigned int ticket = ::atomicAdd(&blocks_finished, 1);
|
||||
is_last = (ticket == gridDim.x * gridDim.y - 1);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (is_last)
|
||||
{
|
||||
sum = tid < gridDim.x * gridDim.y ? result[tid] : VecTraits<result_type>::all(0);
|
||||
|
||||
cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
result[0] = sum;
|
||||
blocks_finished = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template <int BLOCK_SIZE, typename src_type, typename result_type, class Mask, class Op>
|
||||
__global__ void kernel(const PtrStepSz<src_type> src, result_type* result, const Mask mask, const Op op, const int twidth, const int theight)
|
||||
{
|
||||
typedef typename VecTraits<src_type>::elem_type T;
|
||||
typedef typename VecTraits<result_type>::elem_type R;
|
||||
const int cn = VecTraits<src_type>::cn;
|
||||
|
||||
__shared__ R smem[BLOCK_SIZE * cn];
|
||||
|
||||
const int x0 = blockIdx.x * blockDim.x * twidth + threadIdx.x;
|
||||
const int y0 = blockIdx.y * blockDim.y * theight + threadIdx.y;
|
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const int bid = blockIdx.y * gridDim.x + blockIdx.x;
|
||||
|
||||
result_type sum = VecTraits<result_type>::all(0);
|
||||
|
||||
for (int i = 0, y = y0; i < theight && y < src.rows; ++i, y += blockDim.y)
|
||||
{
|
||||
const src_type* ptr = src.ptr(y);
|
||||
|
||||
for (int j = 0, x = x0; j < twidth && x < src.cols; ++j, x += blockDim.x)
|
||||
{
|
||||
if (mask(y, x))
|
||||
{
|
||||
const src_type srcVal = ptr[x];
|
||||
sum = sum + op(saturate_cast<result_type>(srcVal));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cudev::reduce<BLOCK_SIZE>(detail::Unroll<cn>::template smem_tuple<BLOCK_SIZE>(smem), detail::Unroll<cn>::tie(sum), tid, detail::Unroll<cn>::op(plus<R>()));
|
||||
|
||||
GlobalReduce<BLOCK_SIZE, R, cn>::run(sum, result, tid, bid, smem);
|
||||
}
|
||||
|
||||
const int threads_x = 32;
|
||||
const int threads_y = 8;
|
||||
|
||||
void getLaunchCfg(int cols, int rows, dim3& block, dim3& grid)
|
||||
{
|
||||
block = dim3(threads_x, threads_y);
|
||||
|
||||
grid = dim3(divUp(cols, block.x * block.y),
|
||||
divUp(rows, block.y * block.x));
|
||||
|
||||
grid.x = ::min(grid.x, block.x);
|
||||
grid.y = ::min(grid.y, block.y);
|
||||
}
|
||||
|
||||
void getBufSize(int cols, int rows, int cn, int& bufcols, int& bufrows)
|
||||
{
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(cols, rows, block, grid);
|
||||
|
||||
bufcols = grid.x * grid.y * sizeof(double) * cn;
|
||||
bufrows = 1;
|
||||
}
|
||||
|
||||
template <typename T, typename R, int cn, template <typename> class Op>
|
||||
void caller(PtrStepSzb src_, void* buf_, double* out, PtrStepSzb mask)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_type src_type;
|
||||
typedef typename TypeVec<R, cn>::vec_type result_type;
|
||||
|
||||
PtrStepSz<src_type> src(src_);
|
||||
result_type* buf = (result_type*) buf_;
|
||||
|
||||
dim3 block, grid;
|
||||
getLaunchCfg(src.cols, src.rows, block, grid);
|
||||
|
||||
const int twidth = divUp(divUp(src.cols, grid.x), block.x);
|
||||
const int theight = divUp(divUp(src.rows, grid.y), block.y);
|
||||
|
||||
Op<result_type> op;
|
||||
|
||||
if (mask.data)
|
||||
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, SingleMask(mask), op, twidth, theight);
|
||||
else
|
||||
kernel<threads_x * threads_y><<<grid, block>>>(src, buf, WithOutMask(), op, twidth, theight);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
R result[4] = {0, 0, 0, 0};
|
||||
cudaSafeCall( cudaMemcpy(&result, buf, sizeof(result_type), cudaMemcpyDeviceToHost) );
|
||||
|
||||
out[0] = result[0];
|
||||
out[1] = result[1];
|
||||
out[2] = result[2];
|
||||
out[3] = result[3];
|
||||
}
|
||||
|
||||
template <typename T> struct SumType;
|
||||
template <> struct SumType<uchar> { typedef unsigned int R; };
|
||||
template <> struct SumType<schar> { typedef int R; };
|
||||
template <> struct SumType<ushort> { typedef unsigned int R; };
|
||||
template <> struct SumType<short> { typedef int R; };
|
||||
template <> struct SumType<int> { typedef int R; };
|
||||
template <> struct SumType<float> { typedef float R; };
|
||||
template <> struct SumType<double> { typedef double R; };
|
||||
|
||||
template <typename T, int cn>
|
||||
void run(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
|
||||
{
|
||||
typedef typename SumType<T>::R R;
|
||||
caller<T, R, cn, identity>(src, buf, out, mask);
|
||||
}
|
||||
|
||||
template void run<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void run<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void run<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void run<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void run<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void run<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void run<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void run<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template <typename T, int cn>
|
||||
void runAbs(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
|
||||
{
|
||||
typedef typename SumType<T>::R R;
|
||||
caller<T, R, cn, abs_func>(src, buf, out, mask);
|
||||
}
|
||||
|
||||
template void runAbs<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runAbs<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runAbs<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runAbs<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runAbs<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runAbs<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runAbs<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runAbs<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template <typename T> struct Sqr : unary_function<T, T>
|
||||
{
|
||||
__device__ __forceinline__ T operator ()(T x) const
|
||||
{
|
||||
return x * x;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, int cn>
|
||||
void runSqr(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask)
|
||||
{
|
||||
caller<T, double, cn, Sqr>(src, buf, out, mask);
|
||||
}
|
||||
|
||||
template void runSqr<uchar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<uchar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<uchar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<uchar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runSqr<schar, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<schar, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<schar, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<schar, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runSqr<ushort, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<ushort, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<ushort, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<ushort, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runSqr<short, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<short, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<short, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<short, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runSqr<int, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<int, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<int, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<int, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runSqr<float, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<float, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<float, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<float, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
|
||||
template void runSqr<double, 1>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<double, 2>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<double, 3>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
template void runSqr<double, 4>(PtrStepSzb src, void* buf, double* out, PtrStepSzb mask);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
114
modules/gpuarithm/src/cuda/threshold.cu
Normal file
114
modules/gpuarithm/src/cuda/threshold.cu
Normal file
@@ -0,0 +1,114 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/simd_functions.hpp"
|
||||
|
||||
#include "arithm_func_traits.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_binary_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_trunc_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename T> struct TransformFunctorTraits< thresh_to_zero_inv_func<T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(T)>
|
||||
{
|
||||
};
|
||||
}}}
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
template <template <typename> class Op, typename T>
|
||||
void threshold_caller(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream)
|
||||
{
|
||||
Op<T> op(thresh, maxVal);
|
||||
cudev::transform(src, dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> dst, T thresh, T maxVal, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
threshold_caller<thresh_binary_func, T>,
|
||||
threshold_caller<thresh_binary_inv_func, T>,
|
||||
threshold_caller<thresh_trunc_func, T>,
|
||||
threshold_caller<thresh_to_zero_func, T>,
|
||||
threshold_caller<thresh_to_zero_inv_func, T>
|
||||
};
|
||||
|
||||
callers[type]((PtrStepSz<T>) src, (PtrStepSz<T>) dst, static_cast<T>(thresh), static_cast<T>(maxVal), stream);
|
||||
}
|
||||
|
||||
template void threshold<uchar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
template void threshold<schar>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
template void threshold<ushort>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
template void threshold<short>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
template void threshold<int>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
template void threshold<float>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
template void threshold<double>(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
122
modules/gpuarithm/src/cuda/transpose.cu
Normal file
122
modules/gpuarithm/src/cuda/transpose.cu
Normal file
@@ -0,0 +1,122 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace arithm
|
||||
{
|
||||
const int TRANSPOSE_TILE_DIM = 16;
|
||||
const int TRANSPOSE_BLOCK_ROWS = 16;
|
||||
|
||||
template <typename T>
|
||||
__global__ void transposeKernel(const PtrStepSz<T> src, PtrStep<T> dst)
|
||||
{
|
||||
__shared__ T tile[TRANSPOSE_TILE_DIM][TRANSPOSE_TILE_DIM + 1];
|
||||
|
||||
int blockIdx_x, blockIdx_y;
|
||||
|
||||
// do diagonal reordering
|
||||
if (gridDim.x == gridDim.y)
|
||||
{
|
||||
blockIdx_y = blockIdx.x;
|
||||
blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x;
|
||||
}
|
||||
else
|
||||
{
|
||||
int bid = blockIdx.x + gridDim.x * blockIdx.y;
|
||||
blockIdx_y = bid % gridDim.y;
|
||||
blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x;
|
||||
}
|
||||
|
||||
int xIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.x;
|
||||
int yIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.y;
|
||||
|
||||
if (xIndex < src.cols)
|
||||
{
|
||||
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
|
||||
{
|
||||
if (yIndex + i < src.rows)
|
||||
{
|
||||
tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
xIndex = blockIdx_y * TRANSPOSE_TILE_DIM + threadIdx.x;
|
||||
yIndex = blockIdx_x * TRANSPOSE_TILE_DIM + threadIdx.y;
|
||||
|
||||
if (xIndex < src.rows)
|
||||
{
|
||||
for (int i = 0; i < TRANSPOSE_TILE_DIM; i += TRANSPOSE_BLOCK_ROWS)
|
||||
{
|
||||
if (yIndex + i < src.cols)
|
||||
{
|
||||
dst(yIndex + i, xIndex) = tile[threadIdx.x][threadIdx.y + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T> void transpose(PtrStepSz<T> src, PtrStepSz<T> dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(TRANSPOSE_TILE_DIM, TRANSPOSE_TILE_DIM);
|
||||
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
transposeKernel<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void transpose<int>(PtrStepSz<int> src, PtrStepSz<int> dst, cudaStream_t stream);
|
||||
template void transpose<double>(PtrStepSz<double> src, PtrStepSz<double> dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
#endif // CUDA_DISABLER
|
135
modules/gpuarithm/src/cuda/unroll_detail.hpp
Normal file
135
modules/gpuarithm/src/cuda/unroll_detail.hpp
Normal file
@@ -0,0 +1,135 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __UNROLL_DETAIL_HPP__
|
||||
#define __UNROLL_DETAIL_HPP__
|
||||
|
||||
#include <thrust/tuple.h>
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <int cn> struct Unroll;
|
||||
template <> struct Unroll<1>
|
||||
{
|
||||
template <int BLOCK_SIZE, typename R>
|
||||
static __device__ __forceinline__ volatile R* smem_tuple(R* smem)
|
||||
{
|
||||
return smem;
|
||||
}
|
||||
|
||||
template <typename R>
|
||||
static __device__ __forceinline__ R& tie(R& val)
|
||||
{
|
||||
return val;
|
||||
}
|
||||
|
||||
template <class Op>
|
||||
static __device__ __forceinline__ const Op& op(const Op& op)
|
||||
{
|
||||
return op;
|
||||
}
|
||||
};
|
||||
template <> struct Unroll<2>
|
||||
{
|
||||
template <int BLOCK_SIZE, typename R>
|
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*> smem_tuple(R* smem)
|
||||
{
|
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE);
|
||||
}
|
||||
|
||||
template <typename R>
|
||||
static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
|
||||
{
|
||||
return thrust::tie(val.x, val.y);
|
||||
}
|
||||
|
||||
template <class Op>
|
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op> op(const Op& op)
|
||||
{
|
||||
return thrust::make_tuple(op, op);
|
||||
}
|
||||
};
|
||||
template <> struct Unroll<3>
|
||||
{
|
||||
template <int BLOCK_SIZE, typename R>
|
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
|
||||
{
|
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE);
|
||||
}
|
||||
|
||||
template <typename R>
|
||||
static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
|
||||
{
|
||||
return thrust::tie(val.x, val.y, val.z);
|
||||
}
|
||||
|
||||
template <class Op>
|
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op, Op> op(const Op& op)
|
||||
{
|
||||
return thrust::make_tuple(op, op, op);
|
||||
}
|
||||
};
|
||||
template <> struct Unroll<4>
|
||||
{
|
||||
template <int BLOCK_SIZE, typename R>
|
||||
static __device__ __forceinline__ thrust::tuple<volatile R*, volatile R*, volatile R*, volatile R*> smem_tuple(R* smem)
|
||||
{
|
||||
return cv::gpu::cudev::smem_tuple(smem, smem + BLOCK_SIZE, smem + 2 * BLOCK_SIZE, smem + 3 * BLOCK_SIZE);
|
||||
}
|
||||
|
||||
template <typename R>
|
||||
static __device__ __forceinline__ thrust::tuple<typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&, typename cv::gpu::cudev::VecTraits<R>::elem_type&> tie(R& val)
|
||||
{
|
||||
return thrust::tie(val.x, val.y, val.z, val.w);
|
||||
}
|
||||
|
||||
template <class Op>
|
||||
static __device__ __forceinline__ const thrust::tuple<Op, Op, Op, Op> op(const Op& op)
|
||||
{
|
||||
return thrust::make_tuple(op, op, op, op);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#endif // __UNROLL_DETAIL_HPP__
|
Reference in New Issue
Block a user