added per-element min/max to gpu module.
fixed compile error in transform.
This commit is contained in:
parent
d96c5ebb7d
commit
17d9014373
@ -546,6 +546,26 @@ namespace cv
|
||||
CV_EXPORTS GpuMat operator & (const GpuMat& src1, const GpuMat& src2);
|
||||
CV_EXPORTS GpuMat operator ^ (const GpuMat& src1, const GpuMat& src2);
|
||||
|
||||
//! computes per-element minimum of two arrays (dst = min(src1, src2))
|
||||
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst);
|
||||
//! Async version
|
||||
CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream);
|
||||
|
||||
//! computes per-element minimum of array and scalar (dst = min(src1, src2))
|
||||
CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst);
|
||||
//! Async version
|
||||
CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream);
|
||||
|
||||
//! computes per-element maximum of two arrays (dst = max(src1, src2))
|
||||
CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst);
|
||||
//! Async version
|
||||
CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream);
|
||||
|
||||
//! computes per-element maximum of array and scalar (dst = max(src1, src2))
|
||||
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst);
|
||||
//! Async version
|
||||
CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream);
|
||||
|
||||
|
||||
////////////////////////////// Image processing //////////////////////////////
|
||||
|
||||
|
@ -98,6 +98,14 @@ cv::gpu::GpuMat cv::gpu::operator ~ (const GpuMat&) { throw_nogpu(); return GpuM
|
||||
cv::gpu::GpuMat cv::gpu::operator | (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
|
||||
cv::gpu::GpuMat cv::gpu::operator & (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
|
||||
cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat&, const GpuMat&) { throw_nogpu(); return GpuMat(); }
|
||||
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::min(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::min(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::max(const GpuMat&, double, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@ -1119,5 +1127,144 @@ cv::gpu::GpuMat cv::gpu::operator ^ (const GpuMat& src1, const GpuMat& src2)
|
||||
return dst;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// min/max
|
||||
|
||||
namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
template <typename T>
|
||||
void min_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
dst.create(src1.size(), src1.type());
|
||||
mathfunc::min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
dst.create(src1.size(), src1.type());
|
||||
mathfunc::min_gpu<T>(src1.reshape(1), src2, dst.reshape(1), stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void max_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
dst.create(src1.size(), src1.type());
|
||||
mathfunc::max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
dst.create(src1.size(), src1.type());
|
||||
mathfunc::max_gpu<T>(src1.reshape(1), src2, dst.reshape(1), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>,
|
||||
min_caller<float>, min_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, 0);
|
||||
}
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>,
|
||||
min_caller<float>, min_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>,
|
||||
min_caller<float>, min_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, 0);
|
||||
}
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
min_caller<uchar>, min_caller<char>, min_caller<ushort>, min_caller<short>, min_caller<int>,
|
||||
min_caller<float>, min_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>,
|
||||
max_caller<float>, max_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, 0);
|
||||
}
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>,
|
||||
max_caller<float>, max_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>,
|
||||
max_caller<float>, max_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, 0);
|
||||
}
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
max_caller<uchar>, max_caller<char>, max_caller<ushort>, max_caller<short>, max_caller<int>,
|
||||
max_caller<float>, max_caller<double>
|
||||
};
|
||||
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "cuda_shared.hpp"
|
||||
#include "transform.hpp"
|
||||
#include "limits_gpu.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
@ -1295,4 +1296,127 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template void transpose_gpu<short2 >(const DevMem2D& src, const DevMem2D& dst);
|
||||
template void transpose_gpu<int >(const DevMem2D& src, const DevMem2D& dst);
|
||||
template void transpose_gpu<float >(const DevMem2D& src, const DevMem2D& dst);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// min/max
|
||||
|
||||
struct MinOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ T operator()(T a, T b)
|
||||
{
|
||||
return min(a, b);
|
||||
}
|
||||
__device__ float operator()(float a, float b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
__device__ double operator()(double a, double b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
struct MaxOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ T operator()(T a, T b)
|
||||
{
|
||||
return max(a, b);
|
||||
}
|
||||
__device__ float operator()(float a, float b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
__device__ double operator()(double a, double b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
struct ScalarMinOp
|
||||
{
|
||||
double s;
|
||||
|
||||
explicit ScalarMinOp(double s_) : s(s_) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ T operator()(T a)
|
||||
{
|
||||
return saturate_cast<T>(fmin((double)a, s));
|
||||
}
|
||||
};
|
||||
|
||||
struct ScalarMaxOp
|
||||
{
|
||||
double s;
|
||||
|
||||
explicit ScalarMaxOp(double s_) : s(s_) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ T operator()(T a)
|
||||
{
|
||||
return saturate_cast<T>(fmax((double)a, s));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
MinOp op;
|
||||
transform(src1, src2, dst, op, stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void min_gpu<char >(const DevMem2D_<char>& src1, const DevMem2D_<char>& src2, const DevMem2D_<char>& dst, cudaStream_t stream);
|
||||
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
||||
template void min_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
||||
template void min_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
||||
template void min_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
||||
template void min_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
MaxOp op;
|
||||
transform(src1, src2, dst, op, stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void max_gpu<char >(const DevMem2D_<char>& src1, const DevMem2D_<char>& src2, const DevMem2D_<char>& dst, cudaStream_t stream);
|
||||
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
||||
template void max_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
||||
template void max_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
||||
template void max_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
||||
template void max_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
ScalarMinOp op(src2);
|
||||
transform(src1, dst, op, stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void min_gpu<char >(const DevMem2D_<char>& src1, double src2, const DevMem2D_<char>& dst, cudaStream_t stream);
|
||||
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, double src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
||||
template void min_gpu<short >(const DevMem2D_<short>& src1, double src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
||||
template void min_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
||||
template void min_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
||||
template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, double src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
ScalarMaxOp op(src2);
|
||||
transform(src1, dst, op, stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2D& src1, double src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void max_gpu<char >(const DevMem2D_<char>& src1, double src2, const DevMem2D_<char>& dst, cudaStream_t stream);
|
||||
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, double src2, const DevMem2D_<ushort>& dst, cudaStream_t stream);
|
||||
template void max_gpu<short >(const DevMem2D_<short>& src1, double src2, const DevMem2D_<short>& dst, cudaStream_t stream);
|
||||
template void max_gpu<int >(const DevMem2D_<int>& src1, double src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
||||
template void max_gpu<float >(const DevMem2D_<float>& src1, double src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
||||
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
@ -64,7 +64,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
//! Transform kernels
|
||||
|
||||
template <typename T, typename D, typename Mask, typename UnOp>
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __global__ void transform(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
@ -77,7 +77,7 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename Mask, typename BinOp>
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transform(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst, const Mask mask, BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
@ -105,7 +105,7 @@ namespace cv
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
device::transform<T, D, UnOp><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
|
||||
device::transform<T, D><<<grid, threads, 0, stream>>>(src, dst, device::NoMask(), op);
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
|
@ -949,6 +949,30 @@ struct CV_GpuCountNonZeroTest: CvTest
|
||||
}
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// min/max
|
||||
|
||||
struct CV_GpuImageMinMaxTest : public CV_GpuArithmTest
|
||||
{
|
||||
CV_GpuImageMinMaxTest() : CV_GpuArithmTest( "GPU-ImageMinMax", "min/max" ) {}
|
||||
|
||||
int test( const Mat& mat1, const Mat& mat2 )
|
||||
{
|
||||
cv::Mat cpuMinRes, cpuMaxRes;
|
||||
cv::min(mat1, mat2, cpuMinRes);
|
||||
cv::max(mat1, mat2, cpuMaxRes);
|
||||
|
||||
GpuMat gpu1(mat1);
|
||||
GpuMat gpu2(mat2);
|
||||
GpuMat gpuMinRes, gpuMaxRes;
|
||||
cv::gpu::min(gpu1, gpu2, gpuMinRes);
|
||||
cv::gpu::max(gpu1, gpu2, gpuMaxRes);
|
||||
|
||||
return CheckNorm(cpuMinRes, gpuMinRes) == CvTS::OK && CheckNorm(cpuMaxRes, gpuMaxRes) == CvTS::OK ?
|
||||
CvTS::OK : CvTS::FAIL_GENERIC;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////// tests registration /////////////////////////////////////
|
||||
@ -979,3 +1003,4 @@ CV_GpuNppImagePolarToCartTest CV_GpuNppImagePolarToCart_test;
|
||||
CV_GpuMinMaxTest CV_GpuMinMaxTest_test;
|
||||
CV_GpuMinMaxLocTest CV_GpuMinMaxLocTest_test;
|
||||
CV_GpuCountNonZeroTest CV_CountNonZero_test;
|
||||
CV_GpuImageMinMaxTest CV_GpuImageMinMax_test;
|
||||
|
Loading…
x
Reference in New Issue
Block a user