update docs
minor fixes and refactoring of GPU module
This commit is contained in:
@@ -582,10 +582,10 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Match kernel chooser
|
||||
// Match caller
|
||||
|
||||
template <typename Dist, typename T, typename Train, typename Mask>
|
||||
void match_chooser(const DevMem2D_<T>& queryDescs, const Train& train,
|
||||
void matchDispatcher(const DevMem2D_<T>& queryDescs, const Train& train,
|
||||
const Mask& mask, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
bool cc_12)
|
||||
{
|
||||
@@ -616,11 +616,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
if (mask.data)
|
||||
{
|
||||
SingleMask m(mask);
|
||||
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
else
|
||||
{
|
||||
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -640,11 +640,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
if (mask.data)
|
||||
{
|
||||
SingleMask m(mask);
|
||||
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, m, trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
else
|
||||
{
|
||||
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -664,11 +664,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
if (maskCollection.data)
|
||||
{
|
||||
MaskCollection mask(maskCollection.data);
|
||||
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
else
|
||||
{
|
||||
match_chooser<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -688,11 +688,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
if (maskCollection.data)
|
||||
{
|
||||
MaskCollection mask(maskCollection.data);
|
||||
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, mask, trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
else
|
||||
{
|
||||
match_chooser<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
matchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, train, WithOutMask(), trainIdx, imgIdx, distance, cc_12);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -942,22 +942,35 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// knn match caller
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void calcDistanceDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
||||
const Mask& mask, const DevMem2Df& allDist)
|
||||
{
|
||||
calcDistance_caller<16, 16, Dist>(queryDescs, trainDescs, mask, allDist);
|
||||
}
|
||||
|
||||
void findKnnMatchDispatcher(int knn, const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
const DevMem2Df& allDist)
|
||||
{
|
||||
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void knnMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn,
|
||||
const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist)
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
calcDistance_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
calcDistanceDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
SingleMask(mask), allDist);
|
||||
}
|
||||
else
|
||||
{
|
||||
calcDistance_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
calcDistanceDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
WithOutMask(), allDist);
|
||||
}
|
||||
|
||||
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
|
||||
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
|
||||
}
|
||||
|
||||
template void knnMatchL1_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
||||
@@ -973,16 +986,16 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
calcDistance_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
SingleMask(mask), allDist);
|
||||
}
|
||||
else
|
||||
{
|
||||
calcDistance_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
calcDistanceDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
WithOutMask(), allDist);
|
||||
}
|
||||
|
||||
findKnnMatch_caller<256>(knn, trainIdx, distance, allDist);
|
||||
findKnnMatchDispatcher(knn, trainIdx, distance, allDist);
|
||||
}
|
||||
|
||||
template void knnMatchL2_gpu<uchar >(const DevMem2D& queryDescs, const DevMem2D& trainDescs, int knn, const DevMem2D& mask, const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2Df& allDist);
|
||||
@@ -1061,7 +1074,16 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Radius Match kernel chooser
|
||||
// Radius Match caller
|
||||
|
||||
template <typename Dist, typename T, typename Mask>
|
||||
void radiusMatchDispatcher(const DevMem2D_<T>& queryDescs, const DevMem2D_<T>& trainDescs,
|
||||
float maxDistance, const Mask& mask, const DevMem2Di& trainIdx, unsigned int* nMatches,
|
||||
const DevMem2Df& distance)
|
||||
{
|
||||
radiusMatch_caller<16, 16, Dist>(queryDescs, trainDescs, maxDistance, mask,
|
||||
trainIdx, nMatches, distance);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void radiusMatchL1_gpu(const DevMem2D& queryDescs, const DevMem2D& trainDescs, float maxDistance,
|
||||
@@ -1069,12 +1091,12 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
radiusMatch_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
radiusMatchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
radiusMatch_caller<16, 16, L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
radiusMatchDispatcher<L1Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
|
||||
}
|
||||
}
|
||||
@@ -1092,12 +1114,12 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
if (mask.data)
|
||||
{
|
||||
radiusMatch_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
maxDistance, SingleMask(mask), trainIdx, nMatches, distance);
|
||||
}
|
||||
else
|
||||
{
|
||||
radiusMatch_caller<16, 16, L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
radiusMatchDispatcher<L2Dist>((DevMem2D_<T>)queryDescs, (DevMem2D_<T>)trainDescs,
|
||||
maxDistance, WithOutMask(), trainIdx, nMatches, distance);
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -190,6 +190,9 @@ void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src,
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, cudaStream_t stream);
|
||||
static const set_caller_t set_callers[] =
|
||||
{
|
||||
@@ -201,6 +204,11 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
|
||||
|
||||
void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
CV_Assert(mask.type() == CV_8UC1);
|
||||
|
||||
typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream);
|
||||
static const set_caller_t set_callers[] =
|
||||
{
|
||||
@@ -212,6 +220,9 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
|
||||
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
|
||||
|
||||
if( rtype < 0 )
|
||||
|
@@ -625,7 +625,11 @@ namespace
|
||||
}
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
{
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -637,6 +641,10 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -648,6 +656,9 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Str
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst)
|
||||
{
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -659,6 +670,9 @@ void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst)
|
||||
|
||||
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -670,6 +684,10 @@ void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, const Stream& st
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -681,6 +699,10 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -692,6 +714,9 @@ void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Str
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst)
|
||||
{
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -703,6 +728,9 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst)
|
||||
|
||||
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, const Stream& stream)
|
||||
{
|
||||
CV_Assert((src1.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
@@ -749,6 +777,9 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Assert((src.depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type,
|
||||
cudaStream_t stream);
|
||||
|
||||
|
@@ -205,6 +205,9 @@ namespace
|
||||
|
||||
void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const
|
||||
{
|
||||
CV_Assert((depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
|
||||
|
||||
if( rtype < 0 )
|
||||
@@ -428,6 +431,9 @@ GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask)
|
||||
{
|
||||
CV_Assert(mask.type() == CV_8UC1);
|
||||
|
||||
CV_Assert((depth() != CV_64F) ||
|
||||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
|
||||
|
||||
CV_DbgAssert(!this->empty());
|
||||
|
||||
NppiSize sz;
|
||||
|
@@ -393,11 +393,37 @@ namespace cv
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, int scn, int dcn> struct UseSmartUn_
|
||||
{
|
||||
static const bool value = false;
|
||||
};
|
||||
template <typename T, typename D> struct UseSmartUn_<T, D, 1, 1>
|
||||
{
|
||||
static const bool value = device::UnReadWriteTraits<T, D>::shift != 1;
|
||||
};
|
||||
template <typename T, typename D> struct UseSmartUn
|
||||
{
|
||||
static const bool value = UseSmartUn_<T, D, device::VecTraits<T>::cn, device::VecTraits<D>::cn>::value;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, int src1cn, int src2cn, int dstcn> struct UseSmartBin_
|
||||
{
|
||||
static const bool value = false;
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct UseSmartBin_<T1, T2, D, 1, 1, 1>
|
||||
{
|
||||
static const bool value = device::BinReadWriteTraits<T1, T2, D>::shift != 1;
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct UseSmartBin
|
||||
{
|
||||
static const bool value = UseSmartBin_<T1, T2, D, device::VecTraits<T1>::cn, device::VecTraits<T2>::cn, device::VecTraits<D>::cn>::value;
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
TransformDispatcher<device::VecTraits<T>::cn == 1 && device::VecTraits<D>::cn == 1 && device::UnReadWriteTraits<T, D>::shift != 1>::call(src, dst, op, mask, stream);
|
||||
TransformDispatcher< UseSmartUn<T, D>::value >::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp>
|
||||
@@ -416,7 +442,7 @@ namespace cv
|
||||
static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
BinOp op, const Mask& mask, cudaStream_t stream = 0)
|
||||
{
|
||||
TransformDispatcher<device::VecTraits<T1>::cn == 1 && device::VecTraits<T2>::cn == 1 && device::VecTraits<D>::cn == 1 && device::BinReadWriteTraits<T1, T2, D>::shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||
TransformDispatcher< UseSmartBin<T1, T2, D>::value >::call(src1, src2, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp>
|
||||
|
Reference in New Issue
Block a user