fixed several bugs in gpu arithm functions
refactored tests for them
This commit is contained in:
parent
f58c40bfab
commit
844bdea5ac
@ -638,11 +638,11 @@ CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, S
|
||||
|
||||
//! pixel by pixel right shift of an image by a constant value
|
||||
//! supports 1, 3 and 4 channels images with integers elements
|
||||
CV_EXPORTS void rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
CV_EXPORTS void rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! pixel by pixel left shift of an image by a constant value
|
||||
//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth
|
||||
CV_EXPORTS void lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
CV_EXPORTS void lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
//! computes per-element minimum of two arrays (dst = min(src1, src2))
|
||||
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
@ -47,7 +47,7 @@
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// add
|
||||
@ -684,7 +684,7 @@ namespace cv { namespace gpu { namespace device
|
||||
__device__ __forceinline__ uchar4 operator ()(uchar4 a, float b) const
|
||||
{
|
||||
return b != 0 ? make_uchar4(saturate_cast<uchar>(a.x / b), saturate_cast<uchar>(a.y / b),
|
||||
saturate_cast<uchar>(a.z / b), saturate_cast<uchar>(a.w / b))
|
||||
saturate_cast<uchar>(a.z / b), saturate_cast<uchar>(a.w / b))
|
||||
: make_uchar4(0,0,0,0);
|
||||
}
|
||||
};
|
||||
@ -706,8 +706,8 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
__device__ __forceinline__ short4 operator ()(short4 a, float b) const
|
||||
{
|
||||
return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<uchar>(a.y / b),
|
||||
saturate_cast<short>(a.z / b), saturate_cast<uchar>(a.w / b))
|
||||
return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
|
||||
saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
|
||||
: make_short4(0,0,0,0);
|
||||
}
|
||||
};
|
||||
@ -1106,10 +1106,10 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<schar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
//template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
//template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<short >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<int >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
//template void absdiff_gpu<float >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<int >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
//template void absdiff_gpu<float >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template void absdiff_gpu<double>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -1251,7 +1251,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <typename T>
|
||||
struct UnOp<T, UN_OP_NOT>
|
||||
{
|
||||
{
|
||||
static __device__ __forceinline__ T call(T v) { return ~v; }
|
||||
};
|
||||
|
||||
@ -1262,7 +1262,7 @@ namespace cv { namespace gpu { namespace device
|
||||
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows)
|
||||
if (y < rows)
|
||||
{
|
||||
uchar* dst_ptr = dst.ptr(y) + x;
|
||||
const uchar* src_ptr = src.ptr(y) + x;
|
||||
@ -1283,29 +1283,29 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int opid>
|
||||
void bitwiseUnOp(int rows, int width, const PtrStepb src, PtrStepb dst,
|
||||
void bitwiseUnOp(int rows, int width, const PtrStepb src, PtrStepb dst,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(width, threads.x * sizeof(uint)),
|
||||
dim3 grid(divUp(width, threads.x * sizeof(uint)),
|
||||
divUp(rows, threads.y));
|
||||
|
||||
bitwiseUnOpKernel<opid><<<grid, threads>>>(rows, width, src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
template <typename T, int opid>
|
||||
__global__ void bitwiseUnOpKernel(int rows, int cols, int cn, const PtrStepb src,
|
||||
__global__ void bitwiseUnOpKernel(int rows, int cols, int cn, const PtrStepb src,
|
||||
const PtrStepb mask, PtrStepb dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < cols && y < rows && mask.ptr(y)[x / cn])
|
||||
if (x < cols && y < rows && mask.ptr(y)[x / cn])
|
||||
{
|
||||
T* dst_row = (T*)dst.ptr(y);
|
||||
const T* src_row = (const T*)src.ptr(y);
|
||||
@ -1316,21 +1316,21 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T, int opid>
|
||||
void bitwiseUnOp(int rows, int cols, int cn, const PtrStepb src,
|
||||
void bitwiseUnOp(int rows, int cols, int cn, const PtrStepb src,
|
||||
const PtrStepb mask, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
bitwiseUnOpKernel<T, opid><<<grid, threads>>>(rows, cols, cn, src, mask, dst);
|
||||
bitwiseUnOpKernel<T, opid><<<grid, threads>>>(rows, cols, cn, src, mask, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn,
|
||||
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn,
|
||||
const PtrStepb src, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseUnOp<UN_OP_NOT>(rows, static_cast<int>(cols * elem_size1 * cn), src, dst, stream);
|
||||
@ -1338,7 +1338,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStepb src,
|
||||
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStepb src,
|
||||
const PtrStepb mask, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseUnOp<T, UN_OP_NOT>(rows, cols * cn, cn, src, mask, dst, stream);
|
||||
@ -1359,32 +1359,32 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <typename T>
|
||||
struct BinOp<T, BIN_OP_OR>
|
||||
{
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a | b; }
|
||||
{
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a | b; }
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct BinOp<T, BIN_OP_AND>
|
||||
{
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a & b; }
|
||||
{
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a & b; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct BinOp<T, BIN_OP_XOR>
|
||||
{
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a ^ b; }
|
||||
{
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a ^ b; }
|
||||
};
|
||||
|
||||
|
||||
template <int opid>
|
||||
__global__ void bitwiseBinOpKernel(int rows, int width, const PtrStepb src1,
|
||||
__global__ void bitwiseBinOpKernel(int rows, int width, const PtrStepb src1,
|
||||
const PtrStepb src2, PtrStepb dst)
|
||||
{
|
||||
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (y < rows)
|
||||
if (y < rows)
|
||||
{
|
||||
uchar* dst_ptr = dst.ptr(y) + x;
|
||||
const uchar* src1_ptr = src1.ptr(y) + x;
|
||||
@ -1407,7 +1407,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <int opid>
|
||||
void bitwiseBinOp(int rows, int width, const PtrStepb src1, const PtrStepb src2,
|
||||
void bitwiseBinOp(int rows, int width, const PtrStepb src1, const PtrStepb src2,
|
||||
PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
@ -1416,20 +1416,20 @@ namespace cv { namespace gpu { namespace device
|
||||
bitwiseBinOpKernel<opid><<<grid, threads>>>(rows, width, src1, src2, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
template <typename T, int opid>
|
||||
__global__ void bitwiseBinOpKernel(
|
||||
int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
const PtrStepb mask, PtrStepb dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < cols && y < rows && mask.ptr(y)[x / cn])
|
||||
if (x < cols && y < rows && mask.ptr(y)[x / cn])
|
||||
{
|
||||
T* dst_row = (T*)dst.ptr(y);
|
||||
const T* src1_row = (const T*)src1.ptr(y);
|
||||
@ -1441,7 +1441,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T, int opid>
|
||||
void bitwiseBinOp(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
void bitwiseBinOp(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
const PtrStepb mask, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16);
|
||||
@ -1450,12 +1450,12 @@ namespace cv { namespace gpu { namespace device
|
||||
bitwiseBinOpKernel<T, opid><<<grid, threads>>>(rows, cols, cn, src1, src2, mask, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1,
|
||||
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1,
|
||||
const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseBinOp<BIN_OP_OR>(rows, static_cast<int>(cols * elem_size1 * cn), src1, src2, dst, stream);
|
||||
@ -1463,7 +1463,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
const PtrStepb mask, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseBinOp<T, BIN_OP_OR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
|
||||
@ -1474,7 +1474,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template void bitwiseMaskOrCaller<uint>(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||
|
||||
|
||||
void bitwiseAndCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1,
|
||||
void bitwiseAndCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1,
|
||||
const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseBinOp<BIN_OP_AND>(rows, static_cast<int>(cols * elem_size1 * cn), src1, src2, dst, stream);
|
||||
@ -1482,7 +1482,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
const PtrStepb mask, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseBinOp<T, BIN_OP_AND>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
|
||||
@ -1493,7 +1493,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template void bitwiseMaskAndCaller<uint>(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||
|
||||
|
||||
void bitwiseXorCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1,
|
||||
void bitwiseXorCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1,
|
||||
const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseBinOp<BIN_OP_XOR>(rows, static_cast<int>(cols * elem_size1 * cn), src1, src2, dst, stream);
|
||||
@ -1501,7 +1501,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
|
||||
template <typename T>
|
||||
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2,
|
||||
const PtrStepb mask, PtrStepb dst, cudaStream_t stream)
|
||||
{
|
||||
bitwiseBinOp<T, BIN_OP_XOR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
|
||||
@ -1546,7 +1546,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
cv::gpu::device::transform(src1, src2, dst, minimum<T>(), WithOutMask(), stream);
|
||||
cv::gpu::device::transform(src1, src2, dst, minimum<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@ -1560,7 +1560,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
cv::gpu::device::transform(src1, src2, dst, maximum<T>(), WithOutMask(), stream);
|
||||
cv::gpu::device::transform(src1, src2, dst, maximum<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@ -1574,7 +1574,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
|
||||
cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@ -1588,7 +1588,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
|
||||
cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@ -1647,12 +1647,12 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
threshold_caller<thresh_binary_func, T>,
|
||||
threshold_caller<thresh_binary_inv_func, T>,
|
||||
threshold_caller<thresh_trunc_func, T>,
|
||||
threshold_caller<thresh_to_zero_func, T>,
|
||||
threshold_caller<thresh_binary_func, T>,
|
||||
threshold_caller<thresh_binary_inv_func, T>,
|
||||
threshold_caller<thresh_trunc_func, T>,
|
||||
threshold_caller<thresh_to_zero_func, T>,
|
||||
threshold_caller<thresh_to_zero_inv_func, T>
|
||||
};
|
||||
|
||||
@ -1671,14 +1671,14 @@ namespace cv { namespace gpu { namespace device
|
||||
// pow
|
||||
|
||||
template<typename T, bool Signed = device::numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
|
||||
{
|
||||
{
|
||||
float power;
|
||||
PowOp(float power_) : power(power_) {}
|
||||
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& e) const
|
||||
{
|
||||
{
|
||||
return saturate_cast<T>(__powf((float)e, power));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T> struct PowOp<T, true> : unary_function<T, T>
|
||||
@ -1688,11 +1688,11 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
__device__ __forceinline__ float operator()(const T& e) const
|
||||
{
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
if ( (e < 0) && (1 & (int)power) )
|
||||
res *= -1;
|
||||
return res;
|
||||
res *= -1;
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
@ -1736,7 +1736,7 @@ namespace cv { namespace gpu { namespace device
|
||||
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream)
|
||||
{
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), WithOutMask(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
template void pow_caller<uchar>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||
template void pow_caller<schar>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||
|
@ -71,8 +71,8 @@ void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&,
|
||||
void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::rshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::lshift(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::rshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::lshift(const GpuMat&, Scalar_<int>, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
@ -101,11 +101,11 @@ namespace
|
||||
template <int DEPTH> struct NppArithmFunc
|
||||
{
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||
|
||||
|
||||
typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
|
||||
};
|
||||
template <> struct NppArithmFunc<CV_32F>
|
||||
{
|
||||
{
|
||||
typedef NppTypeTraits<CV_32F>::npp_t npp_t;
|
||||
|
||||
typedef NppStatus (*func_t)(const Npp32f* pSrc1, int nSrc1Step, const Npp32f* pSrc2, int nSrc2Step, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
|
||||
@ -123,7 +123,7 @@ namespace
|
||||
sz.width = src1.cols;
|
||||
sz.height = src1.rows;
|
||||
|
||||
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
|
||||
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
|
||||
(npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -145,8 +145,8 @@ namespace
|
||||
NppiSize sz;
|
||||
sz.width = src1.cols;
|
||||
sz.height = src1.rows;
|
||||
|
||||
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
|
||||
|
||||
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
|
||||
(npp_t*)dst.data, static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -162,12 +162,12 @@ namespace
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// add
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
@ -177,7 +177,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>},
|
||||
{0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/},
|
||||
@ -188,7 +188,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
|
||||
{0/*add_gpu<double, unsigned char>*/, 0/*add_gpu<double, signed char>*/, 0/*add_gpu<double, unsigned short>*/, 0/*add_gpu<double, short>*/, 0/*add_gpu<double, int>*/, 0/*add_gpu<double, float>*/, add_gpu<double, double>}
|
||||
};
|
||||
|
||||
static const func_t npp_funcs[7] =
|
||||
static const func_t npp_funcs[7] =
|
||||
{
|
||||
NppArithm<CV_8U, nppiAdd_8u_C1RSfs>::call,
|
||||
0,
|
||||
@ -228,21 +228,21 @@ namespace
|
||||
{
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||
|
||||
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pConstants,
|
||||
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pConstants,
|
||||
npp_t* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
|
||||
};
|
||||
template<int DEPTH> struct NppArithmScalarFunc<DEPTH, 1>
|
||||
{
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||
|
||||
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t pConstants,
|
||||
typedef NppStatus (*func_ptr)(const npp_t* pSrc1, int nSrc1Step, const npp_t pConstants,
|
||||
npp_t* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
|
||||
};
|
||||
template<int DEPTH> struct NppArithmScalarFunc<DEPTH, 2>
|
||||
{
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_complex_type npp_complex_type;
|
||||
|
||||
typedef NppStatus (*func_ptr)(const npp_complex_type* pSrc1, int nSrc1Step, const npp_complex_type pConstants,
|
||||
typedef NppStatus (*func_ptr)(const npp_complex_type* pSrc1, int nSrc1Step, const npp_complex_type pConstants,
|
||||
npp_complex_type* pDst, int nDstStep, NppiSize oSizeROI, int nScaleFactor);
|
||||
};
|
||||
template<int cn> struct NppArithmScalarFunc<CV_32F, cn>
|
||||
@ -313,7 +313,7 @@ namespace
|
||||
nConstant.re = saturate_cast<npp_t>(sc.val[0]);
|
||||
nConstant.im = saturate_cast<npp_t>(sc.val[1]);
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_complex_type>(), static_cast<int>(src.step), nConstant,
|
||||
nppSafeCall( func(src.ptr<npp_complex_type>(), static_cast<int>(src.step), nConstant,
|
||||
dst.ptr<npp_complex_type>(), static_cast<int>(dst.step), sz, 0) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -382,7 +382,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>},
|
||||
{0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/},
|
||||
@ -394,7 +394,7 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
||||
};
|
||||
|
||||
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
{
|
||||
{NppArithmScalar<CV_8U, 1, nppiAddC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiAddC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiAddC_8u_C4RSfs>::call},
|
||||
{0,0,0,0},
|
||||
@ -436,12 +436,12 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// subtract
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
@ -451,7 +451,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>},
|
||||
{0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/},
|
||||
@ -462,15 +462,14 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
|
||||
{0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>}
|
||||
};
|
||||
|
||||
static const func_t npp_funcs[7] =
|
||||
static const func_t npp_funcs[6] =
|
||||
{
|
||||
NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call,
|
||||
0,
|
||||
NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call,
|
||||
NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call,
|
||||
NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call,
|
||||
NppArithm<CV_32F, nppiSub_32f_C1R>::call,
|
||||
subtract_gpu<double, double>
|
||||
NppArithm<CV_32F, nppiSub_32f_C1R>::call
|
||||
};
|
||||
|
||||
CV_Assert(src1.type() != CV_8S);
|
||||
@ -484,7 +483,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
if (mask.empty() && dst.type() == src1.type())
|
||||
if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)
|
||||
{
|
||||
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream);
|
||||
return;
|
||||
@ -502,7 +501,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>},
|
||||
{0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/},
|
||||
@ -514,7 +513,7 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
||||
};
|
||||
|
||||
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
{
|
||||
{NppArithmScalar<CV_8U, 1, nppiSubC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiSubC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiSubC_8u_C4RSfs>::call},
|
||||
{0,0,0,0},
|
||||
@ -556,15 +555,15 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// multiply
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
||||
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
@ -574,7 +573,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>},
|
||||
{0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/},
|
||||
@ -585,7 +584,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
||||
{0/*multiply_gpu<double, unsigned char>*/, 0/*multiply_gpu<double, signed char>*/, 0/*multiply_gpu<double, unsigned short>*/, 0/*multiply_gpu<double, short>*/, 0/*multiply_gpu<double, int>*/, 0/*multiply_gpu<double, float>*/, multiply_gpu<double, double>}
|
||||
};
|
||||
|
||||
static const func_t npp_funcs[7] =
|
||||
static const func_t npp_funcs[7] =
|
||||
{
|
||||
NppArithm<CV_8U, nppiMul_8u_C1RSfs>::call,
|
||||
0,
|
||||
@ -651,7 +650,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>},
|
||||
{0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/},
|
||||
@ -663,7 +662,7 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
||||
};
|
||||
|
||||
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
{
|
||||
{NppArithmScalar<CV_8U, 1, nppiMulC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiMulC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiMulC_8u_C4RSfs>::call},
|
||||
{0,0,0,0},
|
||||
@ -702,18 +701,18 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// divide
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
||||
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void divide_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void divide_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
|
||||
template <typename T, typename D>
|
||||
template <typename T, typename D>
|
||||
void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
@ -723,7 +722,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},
|
||||
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},
|
||||
@ -734,15 +733,14 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
||||
{0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}
|
||||
};
|
||||
|
||||
static const func_t npp_funcs[7] =
|
||||
static const func_t npp_funcs[6] =
|
||||
{
|
||||
NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call,
|
||||
0,
|
||||
NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,
|
||||
NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,
|
||||
NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,
|
||||
NppArithm<CV_32F, nppiDiv_32f_C1R>::call,
|
||||
divide_gpu<double, double>
|
||||
NppArithm<CV_32F, nppiDiv_32f_C1R>::call
|
||||
};
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
@ -753,7 +751,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
||||
|
||||
dst.create(src1.size(), src1.type());
|
||||
|
||||
multiply_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
|
||||
divide_gpu(static_cast<DevMem2D_<uchar4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<uchar4> >(dst), stream);
|
||||
}
|
||||
else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1)
|
||||
{
|
||||
@ -761,10 +759,10 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
||||
|
||||
dst.create(src1.size(), src1.type());
|
||||
|
||||
multiply_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
|
||||
divide_gpu(static_cast<DevMem2D_<short4> >(src1), static_cast<DevMem2Df>(src2), static_cast<DevMem2D_<short4> >(dst), stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
{
|
||||
CV_Assert(src1.type() != CV_8S);
|
||||
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());
|
||||
|
||||
@ -773,7 +771,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
||||
|
||||
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
|
||||
|
||||
if (scale == 1 && dst.type() == src1.type())
|
||||
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
|
||||
{
|
||||
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream);
|
||||
return;
|
||||
@ -792,7 +790,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},
|
||||
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},
|
||||
@ -804,7 +802,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
|
||||
};
|
||||
|
||||
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
static const npp_func_t npp_funcs[7][4] =
|
||||
{
|
||||
{NppArithmScalar<CV_8U, 1, nppiDivC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiDivC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiDivC_8u_C4RSfs>::call},
|
||||
{0,0,0,0},
|
||||
@ -846,7 +844,7 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
|
||||
|
||||
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][7] =
|
||||
static const func_t funcs[7][7] =
|
||||
{
|
||||
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},
|
||||
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},
|
||||
@ -875,12 +873,12 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// absdiff
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T>
|
||||
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
template <typename T>
|
||||
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
@ -890,7 +888,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
absdiff_gpu<unsigned char>, absdiff_gpu<signed char>, absdiff_gpu<unsigned short>, absdiff_gpu<short>, absdiff_gpu<int>, absdiff_gpu<float>, absdiff_gpu<double>
|
||||
};
|
||||
@ -909,7 +907,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
||||
{
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
|
||||
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -919,7 +917,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
||||
{
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( nppiAbsDiff_16u_C1R(src1.ptr<Npp16u>(), static_cast<int>(src1.step), src2.ptr<Npp16u>(), static_cast<int>(src2.step),
|
||||
nppSafeCall( nppiAbsDiff_16u_C1R(src1.ptr<Npp16u>(), static_cast<int>(src1.step), src2.ptr<Npp16u>(), static_cast<int>(src2.step),
|
||||
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -929,7 +927,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
||||
{
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step),
|
||||
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step),
|
||||
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -969,7 +967,7 @@ namespace
|
||||
sz.width = src1.cols;
|
||||
sz.height = src1.rows;
|
||||
|
||||
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (npp_t*)dst.data, static_cast<int>(dst.step),
|
||||
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (npp_t*)dst.data, static_cast<int>(dst.step),
|
||||
sz, static_cast<npp_t>(val)) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -984,14 +982,14 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
|
||||
absdiff_gpu<signed char>,
|
||||
NppAbsDiffC<CV_16U, nppiAbsDiffC_16u_C1R>::call,
|
||||
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
|
||||
absdiff_gpu<signed char>,
|
||||
NppAbsDiffC<CV_16U, nppiAbsDiffC_16u_C1R>::call,
|
||||
absdiff_gpu<short>,
|
||||
absdiff_gpu<int>,
|
||||
NppAbsDiffC<CV_32F, nppiAbsDiffC_32f_C1R>::call,
|
||||
absdiff_gpu<int>,
|
||||
NppAbsDiffC<CV_32F, nppiAbsDiffC_32f_C1R>::call,
|
||||
absdiff_gpu<double>
|
||||
};
|
||||
|
||||
@ -1132,7 +1130,7 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
NppSqr<CV_8U, nppiSqr_8u_C1RSfs, nppiSqr_8u_C4RSfs>::call,
|
||||
0,
|
||||
@ -1209,7 +1207,7 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
NppOneSource<CV_8U, nppiSqrt_8u_C1RSfs>::call,
|
||||
0,
|
||||
@ -1233,7 +1231,7 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
NppOneSource<CV_8U, nppiLn_8u_C1RSfs>::call,
|
||||
0,
|
||||
@ -1257,7 +1255,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
NppOneSource<CV_8U, nppiExp_8u_C1RSfs>::call,
|
||||
0,
|
||||
@ -1277,7 +1275,7 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Comparison of two matrixes
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
template <typename T> void compare_ne(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@ -1291,7 +1289,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
||||
|
||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[7][4] =
|
||||
static const func_t funcs[7][4] =
|
||||
{
|
||||
{compare_eq<unsigned char>, compare_ne<unsigned char>, compare_lt<unsigned char>, compare_le<unsigned char>},
|
||||
{compare_eq<signed char>, compare_ne<signed char>, compare_lt<signed char>, compare_le<signed char>},
|
||||
@ -1353,7 +1351,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Unary bitwise logical operations
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src, PtrStepb dst, cudaStream_t stream);
|
||||
|
||||
@ -1377,9 +1375,9 @@ namespace
|
||||
|
||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||
|
||||
static Caller callers[] =
|
||||
static Caller callers[] =
|
||||
{
|
||||
bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>,
|
||||
bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>,
|
||||
bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>,
|
||||
bitwiseMaskNotCaller<unsigned int>, bitwiseMaskNotCaller<unsigned int>,
|
||||
bitwiseMaskNotCaller<unsigned int>
|
||||
@ -1410,7 +1408,7 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Binary bitwise logical operations
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||
|
||||
@ -1444,9 +1442,9 @@ namespace
|
||||
|
||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||
|
||||
static Caller callers[] =
|
||||
static Caller callers[] =
|
||||
{
|
||||
bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>,
|
||||
bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>,
|
||||
bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>,
|
||||
bitwiseMaskOrCaller<unsigned int>, bitwiseMaskOrCaller<unsigned int>,
|
||||
bitwiseMaskOrCaller<unsigned int>
|
||||
@ -1478,9 +1476,9 @@ namespace
|
||||
|
||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||
|
||||
static Caller callers[] =
|
||||
static Caller callers[] =
|
||||
{
|
||||
bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>,
|
||||
bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>,
|
||||
bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>,
|
||||
bitwiseMaskAndCaller<unsigned int>, bitwiseMaskAndCaller<unsigned int>,
|
||||
bitwiseMaskAndCaller<unsigned int>
|
||||
@ -1512,9 +1510,9 @@ namespace
|
||||
|
||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||
|
||||
static Caller callers[] =
|
||||
static Caller callers[] =
|
||||
{
|
||||
bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>,
|
||||
bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>,
|
||||
bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>,
|
||||
bitwiseMaskXorCaller<unsigned int>, bitwiseMaskXorCaller<unsigned int>,
|
||||
bitwiseMaskXorCaller<unsigned int>
|
||||
@ -1584,7 +1582,7 @@ namespace
|
||||
|
||||
const npp_t pConstants[] = {static_cast<npp_t>(sc.val[0]), static_cast<npp_t>(sc.val[1]), static_cast<npp_t>(sc.val[2]), static_cast<npp_t>(sc.val[3])};
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@ -1602,7 +1600,7 @@ namespace
|
||||
oSizeROI.width = src.cols;
|
||||
oSizeROI.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), static_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@ -1614,7 +1612,7 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[5][4] =
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{NppBitwiseC<CV_8U, 1, nppiOrC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiOrC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiOrC_8u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
@ -1635,7 +1633,7 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[5][4] =
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{NppBitwiseC<CV_8U, 1, nppiAndC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiAndC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiAndC_8u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
@ -1656,7 +1654,7 @@ void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[5][4] =
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{NppBitwiseC<CV_8U, 1, nppiXorC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiXorC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiXorC_8u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
@ -1704,7 +1702,7 @@ namespace
|
||||
oSizeROI.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val, dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
@ -1722,17 +1720,17 @@ namespace
|
||||
oSizeROI.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), sc.val[0], dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI) );
|
||||
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
|
||||
void cv::gpu::rshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[5][4] =
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{NppShift<CV_8U , 1, nppiRShiftC_8u_C1R >::call, 0, NppShift<CV_8U , 3, nppiRShiftC_8u_C3R >::call, NppShift<CV_8U , 4, nppiRShiftC_8u_C4R>::call },
|
||||
{NppShift<CV_8S , 1, nppiRShiftC_8s_C1R >::call, 0, NppShift<CV_8S , 3, nppiRShiftC_8s_C3R >::call, NppShift<CV_8S , 4, nppiRShiftC_8s_C4R>::call },
|
||||
@ -1749,10 +1747,10 @@ void cv::gpu::rshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s
|
||||
funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
|
||||
void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, Scalar_<Npp32u> sc, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[5][4] =
|
||||
static const func_t funcs[5][4] =
|
||||
{
|
||||
{NppShift<CV_8U , 1, nppiLShiftC_8u_C1R>::call , 0, NppShift<CV_8U , 3, nppiLShiftC_8u_C3R>::call , NppShift<CV_8U , 4, nppiLShiftC_8u_C4R>::call },
|
||||
{0 , 0, 0 , 0 },
|
||||
@ -1772,7 +1770,7 @@ void cv::gpu::lshift(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& s
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// Minimum and maximum operations
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||
@ -1803,7 +1801,7 @@ namespace
|
||||
dst.create(src1.size(), src1.type());
|
||||
::cv::gpu::device::min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void max_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
@ -1820,58 +1818,58 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
|
||||
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
|
||||
min_caller<float>, min_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
|
||||
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>,
|
||||
min_caller<float>, min_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
|
||||
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
|
||||
max_caller<float>, max_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
|
||||
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>,
|
||||
max_caller<float>, max_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
@ -1880,7 +1878,7 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// threshold
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T>
|
||||
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type, cudaStream_t stream);
|
||||
@ -1921,10 +1919,10 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
||||
{
|
||||
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
threshold_caller<unsigned char>, threshold_caller<signed char>,
|
||||
threshold_caller<unsigned short>, threshold_caller<short>,
|
||||
threshold_caller<unsigned char>, threshold_caller<signed char>,
|
||||
threshold_caller<unsigned short>, threshold_caller<short>,
|
||||
threshold_caller<int>, threshold_caller<float>, threshold_caller<double>
|
||||
};
|
||||
|
||||
@ -1943,7 +1941,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<typename T>
|
||||
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||
@ -1958,10 +1956,10 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
||||
|
||||
typedef void (*caller_t)(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
pow_caller<unsigned char>, pow_caller<signed char>,
|
||||
pow_caller<unsigned short>, pow_caller<short>,
|
||||
pow_caller<unsigned char>, pow_caller<signed char>,
|
||||
pow_caller<unsigned short>, pow_caller<short>,
|
||||
pow_caller<int>, pow_caller<float>
|
||||
};
|
||||
|
||||
@ -1992,7 +1990,7 @@ namespace
|
||||
oSizeROI.width = img1.cols;
|
||||
oSizeROI.height = img2.rows;
|
||||
|
||||
nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
|
||||
nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
|
||||
|
||||
if (stream == 0)
|
||||
@ -2021,7 +2019,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
|
||||
|
||||
typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
|
||||
0,
|
||||
@ -2046,7 +2044,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// addWeighted
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T1, typename T2, typename D>
|
||||
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -162,10 +162,37 @@ CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX
|
||||
#define ALL_DEVICES testing::ValuesIn(devices())
|
||||
#define DEVICES(feature) testing::ValuesIn(devices(feature))
|
||||
|
||||
#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
|
||||
|
||||
#define ALL_DEPTH testing::Values(MatDepth(CV_8U), MatDepth(CV_8S), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32S), MatDepth(CV_32F), MatDepth(CV_64F))
|
||||
#define ALL_TYPES testing::ValuesIn(all_types())
|
||||
#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end))
|
||||
|
||||
#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113))
|
||||
#define DEPTH_PAIRS testing::Values(std::make_pair(MatDepth(CV_8U), MatDepth(CV_8U)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16U)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_16S)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_8U), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_16U)), \
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_16U), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_16S)), \
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_16S), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32S)), \
|
||||
std::make_pair(MatDepth(CV_32S), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_32S), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_32F), MatDepth(CV_32F)), \
|
||||
std::make_pair(MatDepth(CV_32F), MatDepth(CV_64F)), \
|
||||
\
|
||||
std::make_pair(MatDepth(CV_64F), MatDepth(CV_64F)))
|
||||
|
||||
#define WHOLE testing::Values(UseRoi(false))
|
||||
#define SUBMAT testing::Values(UseRoi(true))
|
||||
@ -173,4 +200,6 @@ CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX
|
||||
|
||||
#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true))
|
||||
|
||||
#define ALL_CMP_CODES testing::Values(CmpCode(cv::CMP_EQ), CmpCode(cv::CMP_NE), CmpCode(cv::CMP_GT), CmpCode(cv::CMP_GE), CmpCode(cv::CMP_LT), CmpCode(cv::CMP_LE))
|
||||
|
||||
#endif // __OPENCV_TEST_UTILITY_HPP__
|
||||
|
Loading…
x
Reference in New Issue
Block a user