added assertion on device features (global atomics) into gpu tests
This commit is contained in:
parent
4a996111ea
commit
bd13e9479b
@ -82,76 +82,76 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace bf_match
|
||||
{
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace bf_knnmatch
|
||||
{
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
template <typename T> void match2L1_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
template <typename T> void match2L2_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace bf_radius_match
|
||||
namespace bf_radius_match
|
||||
{
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchL2_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
@ -192,7 +192,7 @@ bool cv::gpu::BruteForceMatcher_GPU_base::isMaskSupported() const
|
||||
// Match
|
||||
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance,
|
||||
GpuMat& trainIdx, GpuMat& distance,
|
||||
const GpuMat& mask, Stream& stream)
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
@ -200,25 +200,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
|
||||
|
||||
using namespace ::cv::gpu::device::bf_match;
|
||||
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3][6] =
|
||||
{
|
||||
{
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<int>, matchL1_gpu<float>
|
||||
},
|
||||
{
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
|
||||
},
|
||||
{
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
|
||||
}
|
||||
};
|
||||
@ -334,7 +334,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::makeGpuCollection(GpuMat& trainCollect
|
||||
}
|
||||
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, const GpuMat& trainCollection,
|
||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
||||
const GpuMat& masks, Stream& stream)
|
||||
{
|
||||
if (query.empty() || trainCollection.empty())
|
||||
@ -342,8 +342,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
|
||||
|
||||
using namespace ::cv::gpu::device::bf_match;
|
||||
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3][6] =
|
||||
@ -453,25 +453,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
|
||||
|
||||
using namespace ::cv::gpu::device::bf_knnmatch;
|
||||
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3][6] =
|
||||
{
|
||||
{
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<int>, matchL1_gpu<float>
|
||||
},
|
||||
{
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
|
||||
},
|
||||
{
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
|
||||
}
|
||||
};
|
||||
@ -501,7 +501,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
|
||||
|
||||
caller_t func = callers[distType][query.depth()];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
|
||||
DeviceInfo info;
|
||||
int cc = info.majorVersion() * 10 + info.minorVersion();
|
||||
|
||||
@ -520,7 +520,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
|
||||
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
|
||||
}
|
||||
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance,
|
||||
vector< vector<DMatch> >& matches, bool compactResult)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty())
|
||||
@ -536,7 +536,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchConvert(const Mat& trainIdx, c
|
||||
|
||||
matches.clear();
|
||||
matches.reserve(nQuery);
|
||||
|
||||
|
||||
const int* trainIdx_ptr = trainIdx.ptr<int>();
|
||||
const float* distance_ptr = distance.ptr<float>();
|
||||
|
||||
@ -582,25 +582,25 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
|
||||
|
||||
using namespace ::cv::gpu::device::bf_knnmatch;
|
||||
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3][6] =
|
||||
{
|
||||
{
|
||||
match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
|
||||
match2L1_gpu<unsigned short>, match2L1_gpu<short>,
|
||||
match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
|
||||
match2L1_gpu<unsigned short>, match2L1_gpu<short>,
|
||||
match2L1_gpu<int>, match2L1_gpu<float>
|
||||
},
|
||||
{
|
||||
0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
|
||||
0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
|
||||
0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
|
||||
0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
|
||||
0/*match2L2_gpu<int>*/, match2L2_gpu<float>
|
||||
},
|
||||
{
|
||||
match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
|
||||
match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
|
||||
match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
|
||||
match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
|
||||
match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
|
||||
}
|
||||
};
|
||||
@ -620,7 +620,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
|
||||
|
||||
caller_t func = callers[distType][query.depth()];
|
||||
CV_Assert(func != 0);
|
||||
|
||||
|
||||
DeviceInfo info;
|
||||
int cc = info.majorVersion() * 10 + info.minorVersion();
|
||||
|
||||
@ -654,7 +654,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Convert(const Mat& trainIdx,
|
||||
|
||||
matches.clear();
|
||||
matches.reserve(nQuery);
|
||||
|
||||
|
||||
const int* trainIdx_ptr = trainIdx.ptr<int>();
|
||||
const int* imgIdx_ptr = imgIdx.ptr<int>();
|
||||
const float* distance_ptr = distance.ptr<float>();
|
||||
@ -755,33 +755,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch(const GpuMat& query, vector<
|
||||
// RadiusMatch
|
||||
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
|
||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
|
||||
const GpuMat& mask, Stream& stream)
|
||||
{
|
||||
if (query.empty() || train.empty())
|
||||
return;
|
||||
|
||||
using namespace ::cv::gpu::device::bf_radius_match;
|
||||
using namespace cv::gpu::device::bf_radius_match;
|
||||
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3][6] =
|
||||
{
|
||||
{
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<int>, matchL1_gpu<float>
|
||||
},
|
||||
{
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
|
||||
},
|
||||
{
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
|
||||
}
|
||||
};
|
||||
@ -789,7 +789,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
|
||||
DeviceInfo info;
|
||||
int cc = info.majorVersion() * 10 + info.minorVersion();
|
||||
|
||||
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS));
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
const int nQuery = query.rows;
|
||||
const int nTrain = train.rows;
|
||||
@ -804,19 +805,19 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
|
||||
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx);
|
||||
ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance);
|
||||
}
|
||||
|
||||
|
||||
if (stream)
|
||||
stream.enqueueMemSet(nMatches, Scalar::all(0));
|
||||
else
|
||||
nMatches.setTo(Scalar::all(0));
|
||||
|
||||
caller_t func = callers[distType][query.depth()];
|
||||
CV_Assert(func != 0);
|
||||
CV_Assert(func != 0);
|
||||
|
||||
func(query, train, maxDistance, mask, trainIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
|
||||
vector< vector<DMatch> >& matches, bool compactResult)
|
||||
{
|
||||
if (trainIdx.empty() || distance.empty() || nMatches.empty())
|
||||
@ -886,33 +887,33 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat& query, const
|
||||
radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult);
|
||||
}
|
||||
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches,
|
||||
void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches,
|
||||
float maxDistance, const vector<GpuMat>& masks, Stream& stream)
|
||||
{
|
||||
if (query.empty() || empty())
|
||||
return;
|
||||
|
||||
using namespace ::cv::gpu::device::bf_radius_match;
|
||||
using namespace cv::gpu::device::bf_radius_match;
|
||||
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||
int cc, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3][6] =
|
||||
{
|
||||
{
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
|
||||
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
|
||||
matchL1_gpu<int>, matchL1_gpu<float>
|
||||
},
|
||||
{
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
|
||||
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
|
||||
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
|
||||
},
|
||||
{
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
|
||||
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
|
||||
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
|
||||
}
|
||||
};
|
||||
@ -920,7 +921,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
|
||||
DeviceInfo info;
|
||||
int cc = info.majorVersion() * 10 + info.minorVersion();
|
||||
|
||||
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS));
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
const int nQuery = query.rows;
|
||||
|
||||
@ -934,7 +936,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
|
||||
ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, imgIdx);
|
||||
ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance);
|
||||
}
|
||||
|
||||
|
||||
if (stream)
|
||||
stream.enqueueMemSet(nMatches, Scalar::all(0));
|
||||
else
|
||||
@ -946,7 +948,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
|
||||
vector<DevMem2Db> trains_(trainDescCollection.begin(), trainDescCollection.end());
|
||||
vector<DevMem2Db> masks_(masks.begin(), masks.end());
|
||||
|
||||
func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
|
||||
func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
|
||||
trainIdx, imgIdx, distance, nMatches, cc, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
@ -40,7 +40,7 @@
|
||||
//
|
||||
// Copyright (c) 2010, Paul Furgale, Chi Hay Tong
|
||||
//
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// The original code was written by Paul Furgale and Chi Hay Tong
|
||||
// and later optimized and prepared for integration into OpenCV by Itseez.
|
||||
//
|
||||
//M*/
|
||||
@ -52,9 +52,9 @@
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace surf
|
||||
namespace surf
|
||||
{
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Global parameters
|
||||
@ -123,7 +123,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
|
||||
float ratio = (float)newSize / oldSize;
|
||||
|
||||
|
||||
real_t d = 0;
|
||||
|
||||
#pragma unroll
|
||||
@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static __device__ bool check(int sum_i, int sum_j, int size)
|
||||
{
|
||||
float ratio = (float)size / 9.0f;
|
||||
|
||||
|
||||
float d = 0;
|
||||
|
||||
int dx1 = __float2int_rn(ratio * c_DM[0]);
|
||||
@ -423,12 +423,12 @@ namespace cv { namespace gpu { namespace device
|
||||
if (::fabs(x[0]) <= 1.f && ::fabs(x[1]) <= 1.f && ::fabs(x[2]) <= 1.f)
|
||||
{
|
||||
// if the step is within the interpolation region, perform it
|
||||
|
||||
|
||||
const int size = calcSize(c_octave, maxPos.z);
|
||||
|
||||
const int sum_i = (maxPos.y - ((size >> 1) >> c_octave)) << c_octave;
|
||||
const int sum_j = (maxPos.x - ((size >> 1) >> c_octave)) << c_octave;
|
||||
|
||||
|
||||
const float center_i = sum_i + (float)(size - 1) / 2;
|
||||
const float center_j = sum_j + (float)(size - 1) / 2;
|
||||
|
||||
@ -471,8 +471,8 @@ namespace cv { namespace gpu { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
|
||||
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
|
||||
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter,
|
||||
float* featureX, float* featureY, int* featureLaplacian, int* featureOctave, float* featureSize, float* featureHessian,
|
||||
unsigned int* featureCounter)
|
||||
{
|
||||
dim3 threads;
|
||||
@ -509,7 +509,8 @@ namespace cv { namespace gpu { namespace device
|
||||
__shared__ float s_Y[128];
|
||||
__shared__ float s_angle[128];
|
||||
|
||||
__shared__ float s_sum[32 * 4];
|
||||
__shared__ float s_sumx[32 * 4];
|
||||
__shared__ float s_sumy[32 * 4];
|
||||
|
||||
/* The sampling intervals and wavelet sized for selecting an orientation
|
||||
and building the keypoint descriptor are defined relative to 's' */
|
||||
@ -522,126 +523,109 @@ namespace cv { namespace gpu { namespace device
|
||||
const int grad_wav_size = 2 * __float2int_rn(2.0f * s);
|
||||
|
||||
// check when grad_wav_size is too big
|
||||
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
|
||||
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
|
||||
return;
|
||||
|
||||
// Calc X, Y, angle and store it to shared memory
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
|
||||
if (tid < ORI_SAMPLES)
|
||||
{
|
||||
// Calc X, Y, angle and store it to shared memory
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
const float margin = (float)(grad_wav_size - 1) / 2.0f;
|
||||
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
|
||||
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
|
||||
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
|
||||
if (tid < ORI_SAMPLES)
|
||||
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
|
||||
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
|
||||
{
|
||||
const float margin = (float)(grad_wav_size - 1) / 2.0f;
|
||||
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
|
||||
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
|
||||
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
|
||||
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
|
||||
|
||||
if ((unsigned)y < (unsigned)((c_img_rows + 1) - grad_wav_size) && (unsigned)x < (unsigned)((c_img_cols + 1) - grad_wav_size))
|
||||
{
|
||||
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
|
||||
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
|
||||
|
||||
angle = atan2f(Y, X);
|
||||
if (angle < 0)
|
||||
angle += 2.0f * CV_PI_F;
|
||||
angle *= 180.0f / CV_PI_F;
|
||||
}
|
||||
angle = atan2f(Y, X);
|
||||
if (angle < 0)
|
||||
angle += 2.0f * CV_PI_F;
|
||||
angle *= 180.0f / CV_PI_F;
|
||||
}
|
||||
s_X[tid] = X;
|
||||
s_Y[tid] = Y;
|
||||
s_angle[tid] = angle;
|
||||
}
|
||||
s_X[tid] = X;
|
||||
s_Y[tid] = Y;
|
||||
s_angle[tid] = angle;
|
||||
__syncthreads();
|
||||
|
||||
float bestx = 0, besty = 0, best_mod = 0;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 18; ++i)
|
||||
{
|
||||
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
|
||||
|
||||
float sumx = 0.0f, sumy = 0.0f;
|
||||
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx = s_X[threadIdx.x];
|
||||
sumy = s_Y[threadIdx.x];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 32];
|
||||
sumy += s_Y[threadIdx.x + 32];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 64];
|
||||
sumy += s_Y[threadIdx.x + 64];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 96];
|
||||
sumy += s_Y[threadIdx.x + 96];
|
||||
}
|
||||
|
||||
device::reduce<32>(s_sumx + threadIdx.y * 32, sumx, threadIdx.x, plus<volatile float>());
|
||||
device::reduce<32>(s_sumy + threadIdx.y * 32, sumy, threadIdx.x, plus<volatile float>());
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
best_mod = temp_mod;
|
||||
bestx = sumx;
|
||||
besty = sumy;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
float bestx = 0, besty = 0, best_mod = 0;
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_X[threadIdx.y] = bestx;
|
||||
s_Y[threadIdx.y] = besty;
|
||||
s_angle[threadIdx.y] = best_mod;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 18; ++i)
|
||||
{
|
||||
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
{
|
||||
int bestIdx = 0;
|
||||
|
||||
float sumx = 0.0f, sumy = 0.0f;
|
||||
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx = s_X[threadIdx.x];
|
||||
sumy = s_Y[threadIdx.x];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 32];
|
||||
sumy += s_Y[threadIdx.x + 32];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 64];
|
||||
sumy += s_Y[threadIdx.x + 64];
|
||||
}
|
||||
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[threadIdx.x + 96];
|
||||
sumy += s_Y[threadIdx.x + 96];
|
||||
}
|
||||
if (s_angle[1] > s_angle[bestIdx])
|
||||
bestIdx = 1;
|
||||
if (s_angle[2] > s_angle[bestIdx])
|
||||
bestIdx = 2;
|
||||
if (s_angle[3] > s_angle[bestIdx])
|
||||
bestIdx = 3;
|
||||
|
||||
float* s_sum_row = s_sum + threadIdx.y * 32;
|
||||
float kp_dir = atan2f(s_Y[bestIdx], s_X[bestIdx]);
|
||||
if (kp_dir < 0)
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
|
||||
device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
|
||||
device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
best_mod = temp_mod;
|
||||
bestx = sumx;
|
||||
besty = sumy;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
s_X[threadIdx.y] = bestx;
|
||||
s_Y[threadIdx.y] = besty;
|
||||
s_angle[threadIdx.y] = best_mod;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 2 && threadIdx.y == 0)
|
||||
{
|
||||
volatile float* v_x = s_X;
|
||||
volatile float* v_y = s_Y;
|
||||
volatile float* v_mod = s_angle;
|
||||
|
||||
bestx = v_x[threadIdx.x];
|
||||
besty = v_y[threadIdx.x];
|
||||
best_mod = v_mod[threadIdx.x];
|
||||
|
||||
float temp_mod = v_mod[threadIdx.x + 2];
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 2];
|
||||
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 2];
|
||||
v_mod[threadIdx.x] = best_mod = temp_mod;
|
||||
}
|
||||
temp_mod = v_mod[threadIdx.x + 1];
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 1];
|
||||
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 1];
|
||||
}
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0 && best_mod != 0)
|
||||
{
|
||||
float kp_dir = atan2f(besty, bestx);
|
||||
if (kp_dir < 0)
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
|
||||
featureDir[blockIdx.x] = kp_dir;
|
||||
}
|
||||
featureDir[blockIdx.x] = kp_dir;
|
||||
}
|
||||
}
|
||||
|
||||
@ -649,7 +633,7 @@ namespace cv { namespace gpu { namespace device
|
||||
#undef ORI_WIN
|
||||
#undef ORI_SAMPLES
|
||||
|
||||
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
|
||||
void icvCalcOrientation_gpu(const float* featureX, const float* featureY, const float* featureSize, float* featureDir, int nFeatures)
|
||||
{
|
||||
dim3 threads;
|
||||
threads.x = 32;
|
||||
@ -669,27 +653,27 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
#define PATCH_SZ 20
|
||||
|
||||
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
||||
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
||||
{
|
||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002302826324012131f, 0.0005262381164357066f, 0.001097041997127235f, 0.002086334861814976f, 0.003619635012000799f, 0.005728822201490402f, 0.008271530270576477f, 0.01089497376233339f, 0.01309141051024199f, 0.01435048412531614f, 0.01435048412531614f, 0.01309141051024199f, 0.01089497376233339f, 0.008271530270576477f, 0.005728822201490402f, 0.003619635012000799f, 0.002086334861814976f, 0.001097041997127235f, 0.0005262381164357066f, 0.0002302826324012131f,
|
||||
0.0002100782439811155f, 0.0004800673632416874f, 0.001000790391117334f, 0.001903285388834775f, 0.00330205773934722f, 0.00522619066759944f, 0.007545807864516974f, 0.009939077310264111f, 0.01194280479103327f, 0.01309141051024199f, 0.01309141051024199f, 0.01194280479103327f, 0.009939077310264111f, 0.007545807864516974f, 0.00522619066759944f, 0.00330205773934722f, 0.001903285388834775f, 0.001000790391117334f, 0.0004800673632416874f, 0.0002100782439811155f,
|
||||
0.0001748319627949968f, 0.0003995231236331165f, 0.0008328808471560478f, 0.001583957928232849f, 0.002748048631474376f, 0.004349356517195702f, 0.006279794964939356f, 0.008271529339253902f, 0.009939077310264111f, 0.01089497376233339f, 0.01089497376233339f, 0.009939077310264111f, 0.008271529339253902f, 0.006279794964939356f, 0.004349356517195702f, 0.002748048631474376f, 0.001583957928232849f, 0.0008328808471560478f, 0.0003995231236331165f, 0.0001748319627949968f,
|
||||
0.0001327334757661447f, 0.0003033203829545528f, 0.0006323281559161842f, 0.001202550483867526f, 0.002086335094645619f, 0.00330205773934722f, 0.004767658654600382f, 0.006279794964939356f, 0.007545807864516974f, 0.008271530270576477f, 0.008271530270576477f, 0.007545807864516974f, 0.006279794964939356f, 0.004767658654600382f, 0.00330205773934722f, 0.002086335094645619f, 0.001202550483867526f, 0.0006323281559161842f, 0.0003033203829545528f, 0.0001327334757661447f,
|
||||
9.193058212986216e-005f, 0.0002100782585330308f, 0.0004379475140012801f, 0.0008328807889483869f, 0.001444985857233405f, 0.002286989474669099f, 0.00330205773934722f, 0.004349356517195702f, 0.00522619066759944f, 0.005728822201490402f, 0.005728822201490402f, 0.00522619066759944f, 0.004349356517195702f, 0.00330205773934722f, 0.002286989474669099f, 0.001444985857233405f, 0.0008328807889483869f, 0.0004379475140012801f, 0.0002100782585330308f, 9.193058212986216e-005f,
|
||||
5.808438800158911e-005f, 0.0001327334903180599f, 0.0002767078403849155f, 0.0005262380582280457f, 0.0009129836107604206f, 0.001444985857233405f, 0.002086335094645619f, 0.002748048631474376f, 0.00330205773934722f, 0.003619635012000799f, 0.003619635012000799f, 0.00330205773934722f, 0.002748048631474376f, 0.002086335094645619f, 0.001444985857233405f, 0.0009129836107604206f, 0.0005262380582280457f, 0.0002767078403849155f, 0.0001327334903180599f, 5.808438800158911e-005f,
|
||||
3.34794785885606e-005f, 7.650675252079964e-005f, 0.0001594926579855382f, 0.0003033203247468919f, 0.0005262380582280457f, 0.0008328807889483869f, 0.001202550483867526f, 0.001583957928232849f, 0.001903285388834775f, 0.002086334861814976f, 0.002086334861814976f, 0.001903285388834775f, 0.001583957928232849f, 0.001202550483867526f, 0.0008328807889483869f, 0.0005262380582280457f, 0.0003033203247468919f, 0.0001594926579855382f, 7.650675252079964e-005f, 3.34794785885606e-005f,
|
||||
1.760426494001877e-005f, 4.022897701361217e-005f, 8.386484114453197e-005f, 0.0001594926579855382f, 0.0002767078403849155f, 0.0004379475140012801f, 0.0006323281559161842f, 0.0008328808471560478f, 0.001000790391117334f, 0.001097041997127235f, 0.001097041997127235f, 0.001000790391117334f, 0.0008328808471560478f, 0.0006323281559161842f, 0.0004379475140012801f, 0.0002767078403849155f, 0.0001594926579855382f, 8.386484114453197e-005f, 4.022897701361217e-005f, 1.760426494001877e-005f,
|
||||
8.444558261544444e-006f, 1.929736572492402e-005f, 4.022897701361217e-005f, 7.650675252079964e-005f, 0.0001327334903180599f, 0.0002100782585330308f, 0.0003033203829545528f, 0.0003995231236331165f, 0.0004800673632416874f, 0.0005262381164357066f, 0.0005262381164357066f, 0.0004800673632416874f, 0.0003995231236331165f, 0.0003033203829545528f, 0.0002100782585330308f, 0.0001327334903180599f, 7.650675252079964e-005f, 4.022897701361217e-005f, 1.929736572492402e-005f, 8.444558261544444e-006f,
|
||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
|
||||
};
|
||||
|
||||
@ -697,7 +681,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
typedef uchar elem_type;
|
||||
|
||||
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
|
||||
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
|
||||
centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_)
|
||||
{
|
||||
}
|
||||
@ -710,14 +694,14 @@ namespace cv { namespace gpu { namespace device
|
||||
return tex2D(imgTex, pixel_x, pixel_y);
|
||||
}
|
||||
|
||||
float centerX;
|
||||
float centerX;
|
||||
float centerY;
|
||||
float win_offset;
|
||||
float cos_dir;
|
||||
float win_offset;
|
||||
float cos_dir;
|
||||
float sin_dir;
|
||||
};
|
||||
|
||||
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
|
||||
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
|
||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
|
||||
{
|
||||
__shared__ float s_PATCH[6][6];
|
||||
@ -739,7 +723,7 @@ namespace cv { namespace gpu { namespace device
|
||||
sincosf(descriptor_dir, &sin_dir, &cos_dir);
|
||||
|
||||
/* Nearest neighbour version (faster) */
|
||||
const float win_offset = -(float)(win_size - 1) / 2;
|
||||
const float win_offset = -(float)(win_size - 1) / 2;
|
||||
|
||||
// Compute sampling points
|
||||
// since grids are 2D, need to compute xBlock and yBlock indices
|
||||
@ -966,11 +950,11 @@ namespace cv { namespace gpu { namespace device
|
||||
descriptor_base[threadIdx.x] = lookup / len;
|
||||
}
|
||||
|
||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures)
|
||||
{
|
||||
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
||||
|
||||
|
||||
if (descriptors.cols == 64)
|
||||
{
|
||||
compute_descriptors64<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
@ -985,12 +969,12 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
else
|
||||
{
|
||||
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
compute_descriptors128<<<dim3(nFeatures, 16, 1), dim3(6, 6, 1)>>>(descriptors, featureX, featureY, featureSize, featureDir);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
|
||||
normalize_descriptors<128><<<dim3(nFeatures, 1, 1), dim3(128, 1, 1)>>>(descriptors);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
@ -59,7 +59,7 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat&) { throw_nogpu(); return 0; }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
|
||||
cv::gpu::FAST_GPU::FAST_GPU(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
|
||||
nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
|
||||
{
|
||||
}
|
||||
@ -109,9 +109,9 @@ void cv::gpu::FAST_GPU::operator ()(const GpuMat& img, const GpuMat& mask, GpuMa
|
||||
keypoints.cols = getKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace fast
|
||||
namespace fast
|
||||
{
|
||||
int calcKeypoints_gpu(DevMem2Db img, DevMem2Db mask, short2* kpLoc, int maxKeypoints, DevMem2Di score, int threshold);
|
||||
int nonmaxSupression_gpu(const short2* kpLoc, int count, DevMem2Di score, short2* loc, float* response);
|
||||
@ -124,7 +124,9 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma
|
||||
|
||||
CV_Assert(img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
|
||||
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
|
||||
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
|
||||
|
||||
@ -146,7 +148,8 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
|
||||
{
|
||||
using namespace cv::gpu::device::fast;
|
||||
|
||||
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
if (count_ == 0)
|
||||
return 0;
|
||||
@ -160,7 +163,7 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
|
||||
kpLoc_.colRange(0, count_).copyTo(locRow);
|
||||
keypoints.row(1).setTo(Scalar::all(0));
|
||||
|
||||
return count_;
|
||||
return count_;
|
||||
}
|
||||
|
||||
void cv::gpu::FAST_GPU::release()
|
||||
|
@ -120,7 +120,9 @@ namespace
|
||||
CV_Assert(!img.empty() && img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
|
||||
CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
|
||||
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
|
||||
|
||||
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
|
||||
|
||||
const int min_size = calcSize(surf_.nOctaves - 1, 0);
|
||||
CV_Assert(img_rows - min_size >= 0);
|
||||
@ -184,8 +186,8 @@ namespace
|
||||
{
|
||||
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer.ptr<int4>(), maxCounter,
|
||||
keypoints.ptr<float>(SURF_GPU::X_ROW), keypoints.ptr<float>(SURF_GPU::Y_ROW),
|
||||
keypoints.ptr<int>(SURF_GPU::LAPLACIAN_ROW), keypoints.ptr<int>(SURF_GPU::OCTAVE_ROW),
|
||||
keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::HESSIAN_ROW),
|
||||
keypoints.ptr<int>(SURF_GPU::LAPLACIAN_ROW), keypoints.ptr<int>(SURF_GPU::OCTAVE_ROW),
|
||||
keypoints.ptr<float>(SURF_GPU::SIZE_ROW), keypoints.ptr<float>(SURF_GPU::HESSIAN_ROW),
|
||||
counters.ptr<unsigned int>());
|
||||
}
|
||||
}
|
||||
@ -306,7 +308,7 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
|
||||
Mat keypointsCPU(keypointsGPU);
|
||||
|
||||
keypoints.resize(nFeatures);
|
||||
|
||||
|
||||
float* kp_x = keypointsCPU.ptr<float>(SURF_GPU::X_ROW);
|
||||
float* kp_y = keypointsCPU.ptr<float>(SURF_GPU::Y_ROW);
|
||||
int* kp_laplacian = keypointsCPU.ptr<int>(SURF_GPU::LAPLACIAN_ROW);
|
||||
|
@ -108,6 +108,25 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char
|
||||
|
||||
#define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual);
|
||||
|
||||
int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
|
||||
{
|
||||
std::sort(actual.begin(), actual.end(), KeyPointLess());
|
||||
std::sort(gold.begin(), gold.end(), KeyPointLess());
|
||||
|
||||
int validCount = 0;
|
||||
|
||||
for (size_t i = 0; i < gold.size(); ++i)
|
||||
{
|
||||
const cv::KeyPoint& p1 = gold[i];
|
||||
const cv::KeyPoint& p2 = actual[i];
|
||||
|
||||
if (keyPointsEquals(p1, p2))
|
||||
++validCount;
|
||||
}
|
||||
|
||||
return validCount;
|
||||
}
|
||||
|
||||
int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
|
||||
{
|
||||
int validCount = 0;
|
||||
@ -170,20 +189,39 @@ TEST_P(SURF, Detector)
|
||||
surf.upright = upright;
|
||||
surf.keypointsRatio = 0.05f;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
|
||||
|
||||
cv::SURF surf_gold;
|
||||
surf_gold.hessianThreshold = hessianThreshold;
|
||||
surf_gold.nOctaves = nOctaves;
|
||||
surf_gold.nOctaveLayers = nOctaveLayers;
|
||||
surf_gold.extended = extended;
|
||||
surf_gold.upright = upright;
|
||||
cv::SURF surf_gold;
|
||||
surf_gold.hessianThreshold = hessianThreshold;
|
||||
surf_gold.nOctaves = nOctaves;
|
||||
surf_gold.nOctaveLayers = nOctaveLayers;
|
||||
surf_gold.extended = extended;
|
||||
surf_gold.upright = upright;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
surf_gold(image, cv::noArray(), keypoints_gold);
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
surf_gold(image, cv::noArray(), keypoints_gold);
|
||||
|
||||
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
|
||||
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
|
||||
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
|
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
|
||||
|
||||
EXPECT_GT(matchedRatio, 0.95);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(SURF, Detector_Masked)
|
||||
@ -202,20 +240,39 @@ TEST_P(SURF, Detector_Masked)
|
||||
surf.upright = upright;
|
||||
surf.keypointsRatio = 0.05f;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf(loadMat(image), loadMat(mask), keypoints);
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf(loadMat(image), loadMat(mask), keypoints);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf(loadMat(image), loadMat(mask), keypoints);
|
||||
|
||||
cv::SURF surf_gold;
|
||||
surf_gold.hessianThreshold = hessianThreshold;
|
||||
surf_gold.nOctaves = nOctaves;
|
||||
surf_gold.nOctaveLayers = nOctaveLayers;
|
||||
surf_gold.extended = extended;
|
||||
surf_gold.upright = upright;
|
||||
cv::SURF surf_gold;
|
||||
surf_gold.hessianThreshold = hessianThreshold;
|
||||
surf_gold.nOctaves = nOctaves;
|
||||
surf_gold.nOctaveLayers = nOctaveLayers;
|
||||
surf_gold.extended = extended;
|
||||
surf_gold.upright = upright;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
surf_gold(image, mask, keypoints_gold);
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
surf_gold(image, mask, keypoints_gold);
|
||||
|
||||
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
|
||||
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
|
||||
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
|
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
|
||||
|
||||
EXPECT_GT(matchedRatio, 0.95);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(SURF, Descriptor)
|
||||
@ -238,23 +295,39 @@ TEST_P(SURF, Descriptor)
|
||||
surf_gold.extended = extended;
|
||||
surf_gold.upright = upright;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf_gold(image, cv::noArray(), keypoints);
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::gpu::GpuMat descriptors;
|
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
surf_gold(image, cv::noArray(), keypoints);
|
||||
|
||||
cv::gpu::GpuMat descriptors;
|
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
|
||||
cv::gpu::GpuMat descriptors;
|
||||
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
|
||||
|
||||
cv::Mat descriptors_gold;
|
||||
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
|
||||
cv::Mat descriptors_gold;
|
||||
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
|
||||
|
||||
cv::BFMatcher matcher(cv::NORM_L2);
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
|
||||
cv::BFMatcher matcher(cv::NORM_L2);
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
|
||||
|
||||
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
|
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
|
||||
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
|
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
|
||||
|
||||
EXPECT_GT(matchedRatio, 0.35);
|
||||
EXPECT_GT(matchedRatio, 0.35);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
|
||||
@ -295,13 +368,28 @@ TEST_P(FAST, Accuracy)
|
||||
cv::gpu::FAST_GPU fast(threshold);
|
||||
fast.nonmaxSupression = nonmaxSupression;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
cv::FAST(image, keypoints_gold, threshold, nonmaxSupression);
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
cv::FAST(image, keypoints_gold, threshold, nonmaxSupression);
|
||||
|
||||
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
|
||||
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Features2D, FAST, testing::Combine(
|
||||
@ -364,24 +452,40 @@ TEST_P(ORB, Accuracy)
|
||||
cv::gpu::ORB_GPU orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
|
||||
orb.blurForDescriptor = blurForDescriptor;
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::gpu::GpuMat descriptors;
|
||||
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::gpu::GpuMat descriptors;
|
||||
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::gpu::GpuMat descriptors;
|
||||
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
|
||||
cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
|
||||
cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
cv::Mat descriptors_gold;
|
||||
orb_gold(image, mask, keypoints_gold, descriptors_gold);
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
cv::Mat descriptors_gold;
|
||||
orb_gold(image, mask, keypoints_gold, descriptors_gold);
|
||||
|
||||
cv::BFMatcher matcher(cv::NORM_HAMMING);
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
|
||||
cv::BFMatcher matcher(cv::NORM_HAMMING);
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
|
||||
|
||||
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches);
|
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
|
||||
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches);
|
||||
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
|
||||
|
||||
EXPECT_GT(matchedRatio, 0.35);
|
||||
EXPECT_GT(matchedRatio, 0.35);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Features2D, ORB, testing::Combine(
|
||||
@ -713,25 +817,40 @@ TEST_P(BruteForceMatcher, RadiusMatch)
|
||||
|
||||
cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
{
|
||||
if ((int)matches[i].size() != 1)
|
||||
badCount++;
|
||||
else
|
||||
try
|
||||
{
|
||||
cv::DMatch match = matches[i][0];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
|
||||
badCount++;
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != 1)
|
||||
badCount++;
|
||||
else
|
||||
{
|
||||
cv::DMatch match = matches[i][0];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
|
||||
badCount++;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, RadiusMatchAdd)
|
||||
@ -756,42 +875,57 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
|
||||
masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
|
||||
}
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
int shift = matcher.isMaskSupported() ? 1 : 0;
|
||||
int needMatchCount = matcher.isMaskSupported() ? n-1 : n;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
{
|
||||
if ((int)matches[i].size() != needMatchCount)
|
||||
badCount++;
|
||||
else
|
||||
try
|
||||
{
|
||||
int localBadCount = 0;
|
||||
for (int k = 0; k < needMatchCount; k++)
|
||||
{
|
||||
cv::DMatch match = matches[i][k];
|
||||
{
|
||||
if ((int)i < queryDescCount / 2)
|
||||
{
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
|
||||
localBadCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
|
||||
localBadCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
badCount += localBadCount > 0 ? 1 : 0;
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
int shift = matcher.isMaskSupported() ? 1 : 0;
|
||||
int needMatchCount = matcher.isMaskSupported() ? n-1 : n;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != needMatchCount)
|
||||
badCount++;
|
||||
else
|
||||
{
|
||||
int localBadCount = 0;
|
||||
for (int k = 0; k < needMatchCount; k++)
|
||||
{
|
||||
cv::DMatch match = matches[i][k];
|
||||
{
|
||||
if ((int)i < queryDescCount / 2)
|
||||
{
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
|
||||
localBadCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
|
||||
localBadCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
badCount += localBadCount > 0 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
|
||||
|
@ -258,13 +258,28 @@ TEST_P(GaussianBlur, Accuracy)
|
||||
double sigma1 = randomDouble(0.1, 1.0);
|
||||
double sigma2 = randomDouble(0.1, 1.0);
|
||||
|
||||
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
|
||||
cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType);
|
||||
if (ksize.height > 16 && !supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
|
||||
{
|
||||
try
|
||||
{
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::GaussianBlur(loadMat(src), dst, ksize, sigma1, sigma2, borderType);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
ASSERT_EQ(CV_StsNotImplemented, e.code);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
|
||||
cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType);
|
||||
|
||||
cv::Mat dst_gold;
|
||||
cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType);
|
||||
cv::Mat dst_gold;
|
||||
cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 4.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 4.0);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine(
|
||||
|
Loading…
x
Reference in New Issue
Block a user