|
|
|
@@ -48,46 +48,30 @@ using namespace cv::gpu;
|
|
|
|
|
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
|
|
|
|
|
|
|
|
|
void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::rshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::lshift(InputArray, Scalar_<int>, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
|
|
|
|
void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
|
|
|
|
void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); }
|
|
|
|
|
void cv::gpu::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
void cv::gpu::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
|
void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
|
|
|
|
|
|
|
|
|
@@ -2262,6 +2246,15 @@ void cv::gpu::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream
|
|
|
|
|
//////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
// Minimum and maximum operations
|
|
|
|
|
|
|
|
|
|
namespace
|
|
|
|
|
{
|
|
|
|
|
enum
|
|
|
|
|
{
|
|
|
|
|
MIN_OP,
|
|
|
|
|
MAX_OP
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
namespace arithm
|
|
|
|
|
{
|
|
|
|
|
void minMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
|
|
|
|
@@ -2275,12 +2268,13 @@ namespace arithm
|
|
|
|
|
template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
|
|
|
|
|
void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int op)
|
|
|
|
|
{
|
|
|
|
|
using namespace arithm;
|
|
|
|
|
|
|
|
|
|
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
|
|
|
|
static const func_t funcs[] =
|
|
|
|
|
static const func_t funcs[2][7] =
|
|
|
|
|
{
|
|
|
|
|
{
|
|
|
|
|
minMat<unsigned char>,
|
|
|
|
|
minMat<signed char>,
|
|
|
|
@@ -2289,77 +2283,7 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
|
|
|
|
|
minMat<int>,
|
|
|
|
|
minMat<float>,
|
|
|
|
|
minMat<double>
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const int depth = src1.depth();
|
|
|
|
|
const int cn = src1.channels();
|
|
|
|
|
|
|
|
|
|
CV_Assert( depth <= CV_64F );
|
|
|
|
|
CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() );
|
|
|
|
|
|
|
|
|
|
if (depth == CV_64F)
|
|
|
|
|
{
|
|
|
|
|
if (!deviceSupports(NATIVE_DOUBLE))
|
|
|
|
|
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dst.create(src1.size(), src1.type());
|
|
|
|
|
|
|
|
|
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
|
|
|
|
|
|
|
|
|
PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
|
|
|
|
|
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
|
|
|
|
|
PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
|
|
|
|
|
|
|
|
|
|
if (depth == CV_8U || depth == CV_16U)
|
|
|
|
|
{
|
|
|
|
|
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
|
|
|
|
|
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
|
|
|
|
|
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
|
|
|
|
|
|
|
|
|
|
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
|
|
|
|
|
|
|
|
|
|
if (isAllAligned)
|
|
|
|
|
{
|
|
|
|
|
if (depth == CV_8U && (src1_.cols & 3) == 0)
|
|
|
|
|
{
|
|
|
|
|
const int vcols = src1_.cols >> 2;
|
|
|
|
|
|
|
|
|
|
minMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
|
|
|
|
|
stream);
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
else if (depth == CV_16U && (src1_.cols & 1) == 0)
|
|
|
|
|
{
|
|
|
|
|
const int vcols = src1_.cols >> 1;
|
|
|
|
|
|
|
|
|
|
minMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
|
|
|
|
|
stream);
|
|
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const func_t func = funcs[depth];
|
|
|
|
|
|
|
|
|
|
if (!func)
|
|
|
|
|
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
|
|
|
|
|
|
|
|
|
func(src1_, src2_, dst_, stream);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
|
|
|
|
|
{
|
|
|
|
|
using namespace arithm;
|
|
|
|
|
|
|
|
|
|
typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
|
|
|
|
|
static const func_t funcs[] =
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxMat<unsigned char>,
|
|
|
|
|
maxMat<signed char>,
|
|
|
|
@@ -2368,23 +2292,25 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
|
|
|
|
|
maxMat<int>,
|
|
|
|
|
maxMat<float>,
|
|
|
|
|
maxMat<double>
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef void (*opt_func_t)(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
|
|
|
|
|
static const opt_func_t funcs_v4[2] =
|
|
|
|
|
{
|
|
|
|
|
minMat_v4, maxMat_v4
|
|
|
|
|
};
|
|
|
|
|
static const opt_func_t funcs_v2[2] =
|
|
|
|
|
{
|
|
|
|
|
minMat_v2, maxMat_v2
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const int depth = src1.depth();
|
|
|
|
|
const int cn = src1.channels();
|
|
|
|
|
|
|
|
|
|
CV_Assert( depth <= CV_64F );
|
|
|
|
|
CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() );
|
|
|
|
|
|
|
|
|
|
if (depth == CV_64F)
|
|
|
|
|
{
|
|
|
|
|
if (!deviceSupports(NATIVE_DOUBLE))
|
|
|
|
|
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dst.create(src1.size(), src1.type());
|
|
|
|
|
|
|
|
|
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
|
|
|
|
cudaStream_t stream = StreamAccessor::getStream(_stream);
|
|
|
|
|
|
|
|
|
|
PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
|
|
|
|
|
PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
|
|
|
|
@@ -2404,7 +2330,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
|
|
|
|
|
{
|
|
|
|
|
const int vcols = src1_.cols >> 2;
|
|
|
|
|
|
|
|
|
|
maxMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
|
|
|
|
funcs_v4[op](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
|
|
|
|
|
stream);
|
|
|
|
@@ -2415,7 +2341,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
|
|
|
|
|
{
|
|
|
|
|
const int vcols = src1_.cols >> 1;
|
|
|
|
|
|
|
|
|
|
maxMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
|
|
|
|
funcs_v2[op](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
|
|
|
|
|
PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
|
|
|
|
|
stream);
|
|
|
|
@@ -2425,7 +2351,7 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const func_t func = funcs[depth];
|
|
|
|
|
const func_t func = funcs[op][depth];
|
|
|
|
|
|
|
|
|
|
if (!func)
|
|
|
|
|
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
|
|
|
|
@@ -2441,12 +2367,13 @@ namespace
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
|
|
|
|
void minMaxScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op)
|
|
|
|
|
{
|
|
|
|
|
using namespace arithm;
|
|
|
|
|
|
|
|
|
|
typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
|
|
|
|
static const func_t funcs[] =
|
|
|
|
|
static const func_t funcs[2][7] =
|
|
|
|
|
{
|
|
|
|
|
{
|
|
|
|
|
minScalar<unsigned char>,
|
|
|
|
|
minScalar<signed char>,
|
|
|
|
@@ -2455,36 +2382,7 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
|
|
|
|
minScalar<int>,
|
|
|
|
|
minScalar<float>,
|
|
|
|
|
minScalar<double>
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef double (*cast_func_t)(double sc);
|
|
|
|
|
static const cast_func_t cast_func[] =
|
|
|
|
|
{
|
|
|
|
|
castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double>
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const int depth = src.depth();
|
|
|
|
|
|
|
|
|
|
CV_Assert( depth <= CV_64F );
|
|
|
|
|
CV_Assert( src.channels() == 1 );
|
|
|
|
|
|
|
|
|
|
if (depth == CV_64F)
|
|
|
|
|
{
|
|
|
|
|
if (!deviceSupports(NATIVE_DOUBLE))
|
|
|
|
|
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dst.create(src.size(), src.type());
|
|
|
|
|
|
|
|
|
|
funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
|
|
|
|
{
|
|
|
|
|
using namespace arithm;
|
|
|
|
|
|
|
|
|
|
typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
|
|
|
|
|
static const func_t funcs[] =
|
|
|
|
|
},
|
|
|
|
|
{
|
|
|
|
|
maxScalar<unsigned char>,
|
|
|
|
|
maxScalar<signed char>,
|
|
|
|
@@ -2493,6 +2391,7 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
|
|
|
|
maxScalar<int>,
|
|
|
|
|
maxScalar<float>,
|
|
|
|
|
maxScalar<double>
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
typedef double (*cast_func_t)(double sc);
|
|
|
|
@@ -2506,15 +2405,17 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
|
|
|
|
|
CV_Assert( depth <= CV_64F );
|
|
|
|
|
CV_Assert( src.channels() == 1 );
|
|
|
|
|
|
|
|
|
|
if (depth == CV_64F)
|
|
|
|
|
{
|
|
|
|
|
if (!deviceSupports(NATIVE_DOUBLE))
|
|
|
|
|
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
|
|
|
|
|
}
|
|
|
|
|
funcs[op][depth](src, cast_func[depth](val[0]), dst, StreamAccessor::getStream(stream));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
dst.create(src.size(), src.type());
|
|
|
|
|
void cv::gpu::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
|
|
|
|
|
{
|
|
|
|
|
arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream));
|
|
|
|
|
void cv::gpu::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
|
|
|
|
|
{
|
|
|
|
|
arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
|
|
|
|