Merge pull request #3596 from jet47:cuda-features2d-refactoring
This commit is contained in:
commit
3a84444488
@ -48,6 +48,7 @@
|
||||
#endif
|
||||
|
||||
#include "opencv2/core/cuda.hpp"
|
||||
#include "opencv2/features2d.hpp"
|
||||
#include "opencv2/cudafilters.hpp"
|
||||
|
||||
/**
|
||||
@ -62,262 +63,396 @@ namespace cv { namespace cuda {
|
||||
//! @addtogroup cudafeatures2d
|
||||
//! @{
|
||||
|
||||
/** @brief Brute-force descriptor matcher.
|
||||
//
|
||||
// DescriptorMatcher
|
||||
//
|
||||
|
||||
For each descriptor in the first set, this matcher finds the closest descriptor in the second set
|
||||
by trying each one. This descriptor matcher supports masking permissible matches between descriptor
|
||||
sets.
|
||||
/** @brief Abstract base class for matching keypoint descriptors.
|
||||
|
||||
The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups
|
||||
of match methods: for matching descriptors of one image with another image or with an image set.
|
||||
Also, all functions have an alternative to save results either to the GPU memory or to the CPU
|
||||
memory.
|
||||
|
||||
@sa DescriptorMatcher, BFMatcher
|
||||
It has two groups of match methods: for matching descriptors of an image with another image or with
|
||||
an image set.
|
||||
*/
|
||||
class CV_EXPORTS BFMatcher_CUDA
|
||||
class CV_EXPORTS DescriptorMatcher : public cv::Algorithm
|
||||
{
|
||||
public:
|
||||
explicit BFMatcher_CUDA(int norm = cv::NORM_L2);
|
||||
//
|
||||
// Factories
|
||||
//
|
||||
|
||||
//! Add descriptors to train descriptor collection
|
||||
void add(const std::vector<GpuMat>& descCollection);
|
||||
/** @brief Brute-force descriptor matcher.
|
||||
|
||||
//! Get train descriptors collection
|
||||
const std::vector<GpuMat>& getTrainDescriptors() const;
|
||||
For each descriptor in the first set, this matcher finds the closest descriptor in the second set
|
||||
by trying each one. This descriptor matcher supports masking permissible matches of descriptor
|
||||
sets.
|
||||
|
||||
//! Clear train descriptors collection
|
||||
void clear();
|
||||
@param normType One of NORM_L1, NORM_L2, NORM_HAMMING. L1 and L2 norms are
|
||||
preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and
|
||||
BRIEF).
|
||||
*/
|
||||
static Ptr<DescriptorMatcher> createBFMatcher(int normType = cv::NORM_L2);
|
||||
|
||||
//! Return true if there are not train descriptors in collection
|
||||
bool empty() const;
|
||||
//
|
||||
// Utility
|
||||
//
|
||||
|
||||
//! Return true if the matcher supports mask in match methods
|
||||
bool isMaskSupported() const;
|
||||
/** @brief Returns true if the descriptor matcher supports masking permissible matches.
|
||||
*/
|
||||
virtual bool isMaskSupported() const = 0;
|
||||
|
||||
//! Find one best match for each query descriptor
|
||||
void matchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance,
|
||||
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||
//
|
||||
// Descriptor collection
|
||||
//
|
||||
|
||||
//! Download trainIdx and distance and convert it to CPU vector with DMatch
|
||||
static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector<DMatch>& matches);
|
||||
//! Convert trainIdx and distance to vector with DMatch
|
||||
static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>& matches);
|
||||
/** @brief Adds descriptors to train a descriptor collection.
|
||||
|
||||
//! Find one best match for each query descriptor
|
||||
void match(const GpuMat& query, const GpuMat& train, std::vector<DMatch>& matches, const GpuMat& mask = GpuMat());
|
||||
If the collection is not empty, the new descriptors are added to existing train descriptors.
|
||||
|
||||
//! Make gpu collection of trains and masks in suitable format for matchCollection function
|
||||
void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
|
||||
@param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same
|
||||
train image.
|
||||
*/
|
||||
virtual void add(const std::vector<GpuMat>& descriptors) = 0;
|
||||
|
||||
//! Find one best match from train collection for each query descriptor
|
||||
void matchCollection(const GpuMat& query, const GpuMat& trainCollection,
|
||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
||||
const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @brief Returns a constant link to the train descriptor collection.
|
||||
*/
|
||||
virtual const std::vector<GpuMat>& getTrainDescriptors() const = 0;
|
||||
|
||||
//! Download trainIdx, imgIdx and distance and convert it to vector with DMatch
|
||||
static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector<DMatch>& matches);
|
||||
//! Convert trainIdx, imgIdx and distance to vector with DMatch
|
||||
static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>& matches);
|
||||
/** @brief Clears the train descriptor collection.
|
||||
*/
|
||||
virtual void clear() = 0;
|
||||
|
||||
//! Find one best match from train collection for each query descriptor.
|
||||
void match(const GpuMat& query, std::vector<DMatch>& matches, const std::vector<GpuMat>& masks = std::vector<GpuMat>());
|
||||
/** @brief Returns true if there are no train descriptors in the collection.
|
||||
*/
|
||||
virtual bool empty() const = 0;
|
||||
|
||||
//! Find k best matches for each query descriptor (in increasing order of distances)
|
||||
void knnMatchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k,
|
||||
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @brief Trains a descriptor matcher.
|
||||
|
||||
//! Download trainIdx and distance and convert it to vector with DMatch
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx and distance to vector with DMatch
|
||||
static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
Trains a descriptor matcher (for example, the flann index). In all methods to match, the method
|
||||
train() is run every time before matching.
|
||||
*/
|
||||
virtual void train() = 0;
|
||||
|
||||
//! Find k best matches for each query descriptor (in increasing order of distances).
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
void knnMatch(const GpuMat& query, const GpuMat& train,
|
||||
std::vector< std::vector<DMatch> >& matches, int k, const GpuMat& mask = GpuMat(),
|
||||
bool compactResult = false);
|
||||
//
|
||||
// 1 to 1 match
|
||||
//
|
||||
|
||||
//! Find k best matches from train collection for each query descriptor (in increasing order of distances)
|
||||
void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection,
|
||||
GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance,
|
||||
const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @brief Finds the best match for each descriptor from a query set (blocking version).
|
||||
|
||||
//! Download trainIdx and distance and convert it to vector with DMatch
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
//! @see BFMatcher_CUDA::knnMatchDownload
|
||||
static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx and distance to vector with DMatch
|
||||
//! @see BFMatcher_CUDA::knnMatchConvert
|
||||
static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches. If a query descriptor is masked out in mask , no match is added for this
|
||||
descriptor. So, matches size may be smaller than the query descriptors count.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
|
||||
//! Find k best matches for each query descriptor (in increasing order of distances).
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
void knnMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, int k,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
|
||||
In the first variant of this method, the train descriptors are passed as an input argument. In the
|
||||
second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
|
||||
used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
|
||||
matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
|
||||
mask.at\<uchar\>(i,j) is non-zero.
|
||||
*/
|
||||
virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<DMatch>& matches,
|
||||
InputArray mask = noArray()) = 0;
|
||||
|
||||
//! Find best matches for each query descriptor which have distance less than maxDistance.
|
||||
//! nMatches.at<int>(0, queryIdx) will contain matches count for queryIdx.
|
||||
//! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches,
|
||||
//! because it didn't have enough memory.
|
||||
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10),
|
||||
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
|
||||
//! Matches doesn't sorted.
|
||||
void radiusMatchSingle(const GpuMat& query, const GpuMat& train,
|
||||
GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
|
||||
const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null());
|
||||
/** @overload
|
||||
*/
|
||||
virtual void match(InputArray queryDescriptors,
|
||||
std::vector<DMatch>& matches,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>()) = 0;
|
||||
|
||||
//! Download trainIdx, nMatches and distance and convert it to vector with DMatch.
|
||||
//! matches will be sorted in increasing order of distances.
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx, nMatches and distance to vector with DMatch.
|
||||
static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
/** @brief Finds the best match for each descriptor from a query set (asynchronous version).
|
||||
|
||||
//! Find best matches for each query descriptor which have distance less than maxDistance
|
||||
//! in increasing order of distances).
|
||||
void radiusMatch(const GpuMat& query, const GpuMat& train,
|
||||
std::vector< std::vector<DMatch> >& matches, float maxDistance,
|
||||
const GpuMat& mask = GpuMat(), bool compactResult = false);
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches array stored in GPU memory. Internal representation is not defined.
|
||||
Use DescriptorMatcher::matchConvert method to retrieve results in standard representation.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param stream CUDA stream.
|
||||
|
||||
//! Find best matches for each query descriptor which have distance less than maxDistance.
|
||||
//! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10),
|
||||
//! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches
|
||||
//! Matches doesn't sorted.
|
||||
void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), Stream& stream = Stream::Null());
|
||||
In the first variant of this method, the train descriptors are passed as an input argument. In the
|
||||
second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is
|
||||
used. Optional mask (or masks) can be passed to specify which query and training descriptors can be
|
||||
matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if
|
||||
mask.at\<uchar\>(i,j) is non-zero.
|
||||
*/
|
||||
virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
OutputArray matches,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
//! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch.
|
||||
//! matches will be sorted in increasing order of distances.
|
||||
//! compactResult is used when mask is not empty. If compactResult is false matches
|
||||
//! vector will have the same size as queryDescriptors rows. If compactResult is true
|
||||
//! matches vector will not contain matches for fully masked out query descriptors.
|
||||
static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
//! Convert trainIdx, nMatches and distance to vector with DMatch.
|
||||
static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
|
||||
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
|
||||
/** @overload
|
||||
*/
|
||||
virtual void matchAsync(InputArray queryDescriptors,
|
||||
OutputArray matches,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
//! Find best matches from train collection for each query descriptor which have distance less than
|
||||
//! maxDistance (in increasing order of distances).
|
||||
void radiusMatch(const GpuMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(), bool compactResult = false);
|
||||
/** @brief Converts matches array from internal representation to standard matches vector.
|
||||
|
||||
int norm;
|
||||
The method is supposed to be used with DescriptorMatcher::matchAsync to get final result.
|
||||
Call this method only after DescriptorMatcher::matchAsync is completed (ie. after synchronization).
|
||||
|
||||
private:
|
||||
std::vector<GpuMat> trainDescCollection;
|
||||
@param gpu_matches Matches, returned from DescriptorMatcher::matchAsync.
|
||||
@param matches Vector of DMatch objects.
|
||||
*/
|
||||
virtual void matchConvert(InputArray gpu_matches,
|
||||
std::vector<DMatch>& matches) = 0;
|
||||
|
||||
//
|
||||
// knn match
|
||||
//
|
||||
|
||||
/** @brief Finds the k best matches for each descriptor from a query set (blocking version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches. Each matches[i] is k or less matches for the same query descriptor.
|
||||
@param k Count of best matches found per each query descriptor or less if a query descriptor has
|
||||
less than k possible matches in total.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
|
||||
These extended variants of DescriptorMatcher::match methods find several best matches for each query
|
||||
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match
|
||||
for the details about query and train descriptors.
|
||||
*/
|
||||
virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
int k,
|
||||
InputArray mask = noArray(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void knnMatch(InputArray queryDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
int k,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @brief Finds the k best matches for each descriptor from a query set (asynchronous version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches array stored in GPU memory. Internal representation is not defined.
|
||||
Use DescriptorMatcher::knnMatchConvert method to retrieve results in standard representation.
|
||||
@param k Count of best matches found per each query descriptor or less if a query descriptor has
|
||||
less than k possible matches in total.
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param stream CUDA stream.
|
||||
|
||||
These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query
|
||||
descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::matchAsync
|
||||
for the details about query and train descriptors.
|
||||
*/
|
||||
virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
OutputArray matches,
|
||||
int k,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void knnMatchAsync(InputArray queryDescriptors,
|
||||
OutputArray matches,
|
||||
int k,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @brief Converts matches array from internal representation to standard matches vector.
|
||||
|
||||
The method is supposed to be used with DescriptorMatcher::knnMatchAsync to get final result.
|
||||
Call this method only after DescriptorMatcher::knnMatchAsync is completed (ie. after synchronization).
|
||||
|
||||
@param gpu_matches Matches, returned from DescriptorMatcher::knnMatchAsync.
|
||||
@param matches Vector of DMatch objects.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
*/
|
||||
virtual void knnMatchConvert(InputArray gpu_matches,
|
||||
std::vector< std::vector<DMatch> >& matches,
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
//
|
||||
// radius match
|
||||
//
|
||||
|
||||
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (blocking version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Found matches.
|
||||
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
|
||||
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
|
||||
in Pixels)!
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
|
||||
For each query descriptor, the methods find such training descriptors that the distance between the
|
||||
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
|
||||
returned in the distance increasing order.
|
||||
*/
|
||||
virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
float maxDistance,
|
||||
InputArray mask = noArray(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void radiusMatch(InputArray queryDescriptors,
|
||||
std::vector<std::vector<DMatch> >& matches,
|
||||
float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
bool compactResult = false) = 0;
|
||||
|
||||
/** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (asynchronous version).
|
||||
|
||||
@param queryDescriptors Query set of descriptors.
|
||||
@param trainDescriptors Train set of descriptors. This set is not added to the train descriptors
|
||||
collection stored in the class object.
|
||||
@param matches Matches array stored in GPU memory. Internal representation is not defined.
|
||||
Use DescriptorMatcher::radiusMatchConvert method to retrieve results in standard representation.
|
||||
@param maxDistance Threshold for the distance between matched descriptors. Distance means here
|
||||
metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured
|
||||
in Pixels)!
|
||||
@param mask Mask specifying permissible matches between an input query and train matrices of
|
||||
descriptors.
|
||||
@param stream CUDA stream.
|
||||
|
||||
For each query descriptor, the methods find such training descriptors that the distance between the
|
||||
query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are
|
||||
returned in the distance increasing order.
|
||||
*/
|
||||
virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
|
||||
OutputArray matches,
|
||||
float maxDistance,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @overload
|
||||
*/
|
||||
virtual void radiusMatchAsync(InputArray queryDescriptors,
|
||||
OutputArray matches,
|
||||
float maxDistance,
|
||||
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
|
||||
Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @brief Converts matches array from internal representation to standard matches vector.
|
||||
|
||||
The method is supposed to be used with DescriptorMatcher::radiusMatchAsync to get final result.
|
||||
Call this method only after DescriptorMatcher::radiusMatchAsync is completed (ie. after synchronization).
|
||||
|
||||
@param gpu_matches Matches, returned from DescriptorMatcher::radiusMatchAsync.
|
||||
@param matches Vector of DMatch objects.
|
||||
@param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is
|
||||
false, the matches vector has the same size as queryDescriptors rows. If compactResult is true,
|
||||
the matches vector does not contain matches for fully masked-out query descriptors.
|
||||
*/
|
||||
virtual void radiusMatchConvert(InputArray gpu_matches,
|
||||
std::vector< std::vector<DMatch> >& matches,
|
||||
bool compactResult = false) = 0;
|
||||
};
|
||||
|
||||
/** @brief Class used for corner detection using the FAST algorithm. :
|
||||
//
|
||||
// Feature2DAsync
|
||||
//
|
||||
|
||||
/** @brief Abstract base class for CUDA asynchronous 2D image feature detectors and descriptor extractors.
|
||||
*/
|
||||
class CV_EXPORTS FAST_CUDA
|
||||
class CV_EXPORTS Feature2DAsync
|
||||
{
|
||||
public:
|
||||
virtual ~Feature2DAsync();
|
||||
|
||||
/** @brief Detects keypoints in an image.
|
||||
|
||||
@param image Image.
|
||||
@param keypoints The detected keypoints.
|
||||
@param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer
|
||||
matrix with non-zero values in the region of interest.
|
||||
@param stream CUDA stream.
|
||||
*/
|
||||
virtual void detectAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
InputArray mask = noArray(),
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Computes the descriptors for a set of keypoints detected in an image.
|
||||
|
||||
@param image Image.
|
||||
@param keypoints Input collection of keypoints.
|
||||
@param descriptors Computed descriptors. Row j is the descriptor for j-th keypoint.
|
||||
@param stream CUDA stream.
|
||||
*/
|
||||
virtual void computeAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
OutputArray descriptors,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** Detects keypoints and computes the descriptors. */
|
||||
virtual void detectAndComputeAsync(InputArray image,
|
||||
InputArray mask,
|
||||
OutputArray keypoints,
|
||||
OutputArray descriptors,
|
||||
bool useProvidedKeypoints = false,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/** Converts keypoints array from internal representation to standard vector. */
|
||||
virtual void convert(InputArray gpu_keypoints,
|
||||
std::vector<KeyPoint>& keypoints) = 0;
|
||||
};
|
||||
|
||||
//
|
||||
// FastFeatureDetector
|
||||
//
|
||||
|
||||
/** @brief Wrapping class for feature detection using the FAST method.
|
||||
*/
|
||||
class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
LOCATION_ROW = 0,
|
||||
RESPONSE_ROW,
|
||||
ROWS_COUNT
|
||||
ROWS_COUNT,
|
||||
|
||||
FEATURE_SIZE = 7
|
||||
};
|
||||
|
||||
//! all features have same size
|
||||
static const int FEATURE_SIZE = 7;
|
||||
static Ptr<FastFeatureDetector> create(int threshold=10,
|
||||
bool nonmaxSuppression=true,
|
||||
int type=FastFeatureDetector::TYPE_9_16,
|
||||
int max_npoints = 5000);
|
||||
|
||||
/** @brief Constructor.
|
||||
|
||||
@param threshold Threshold on difference between intensity of the central pixel and pixels on a
|
||||
circle around this pixel.
|
||||
@param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners
|
||||
(keypoints).
|
||||
@param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \*
|
||||
image_width \* image_height).
|
||||
*/
|
||||
explicit FAST_CUDA(int threshold, bool nonmaxSuppression = true, double keypointsRatio = 0.05);
|
||||
|
||||
/** @brief Finds the keypoints using FAST detector.
|
||||
|
||||
@param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
|
||||
supported.
|
||||
@param mask Optional input mask that marks the regions where we should detect features.
|
||||
@param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU
|
||||
memory:
|
||||
- keypoints.ptr\<Vec2s\>(LOCATION_ROW)[i] will contain location of i'th point
|
||||
- keypoints.ptr\<float\>(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum
|
||||
suppression is applied)
|
||||
*/
|
||||
void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
|
||||
/** @overload */
|
||||
void operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
/** @brief Download keypoints from GPU to CPU memory.
|
||||
*/
|
||||
static void downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
/** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
|
||||
*/
|
||||
static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
/** @brief Releases inner buffer memory.
|
||||
*/
|
||||
void release();
|
||||
|
||||
bool nonmaxSuppression;
|
||||
|
||||
int threshold;
|
||||
|
||||
//! max keypoints = keypointsRatio * img.size().area()
|
||||
double keypointsRatio;
|
||||
|
||||
/** @brief Find keypoints and compute it's response if nonmaxSuppression is true.
|
||||
|
||||
@param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are
|
||||
supported.
|
||||
@param mask Optional input mask that marks the regions where we should detect features.
|
||||
|
||||
The function returns count of detected keypoints.
|
||||
*/
|
||||
int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask);
|
||||
|
||||
/** @brief Gets final array of keypoints.
|
||||
|
||||
@param keypoints The output vector of keypoints.
|
||||
|
||||
The function performs non-max suppression if needed and returns final count of keypoints.
|
||||
*/
|
||||
int getKeyPoints(GpuMat& keypoints);
|
||||
|
||||
private:
|
||||
GpuMat kpLoc_;
|
||||
int count_;
|
||||
|
||||
GpuMat score_;
|
||||
|
||||
GpuMat d_keypoints_;
|
||||
virtual void setMaxNumPoints(int max_npoints) = 0;
|
||||
virtual int getMaxNumPoints() const = 0;
|
||||
};
|
||||
|
||||
/** @brief Class for extracting ORB features and descriptors from an image. :
|
||||
//
|
||||
// ORB
|
||||
//
|
||||
|
||||
/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor
|
||||
*
|
||||
* @sa cv::ORB
|
||||
*/
|
||||
class CV_EXPORTS ORB_CUDA
|
||||
class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync
|
||||
{
|
||||
public:
|
||||
enum
|
||||
@ -331,113 +466,20 @@ public:
|
||||
ROWS_COUNT
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
DEFAULT_FAST_THRESHOLD = 20
|
||||
};
|
||||
|
||||
/** @brief Constructor.
|
||||
|
||||
@param nFeatures The number of desired features.
|
||||
@param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to
|
||||
the next.
|
||||
@param nLevels The number of levels in the scale pyramid.
|
||||
@param edgeThreshold How far from the boundary the points should be.
|
||||
@param firstLevel The level at which the image is given. If 1, that means we will also look at the
|
||||
image scaleFactor times bigger.
|
||||
@param WTA_K
|
||||
@param scoreType
|
||||
@param patchSize
|
||||
*/
|
||||
explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
|
||||
int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
|
||||
|
||||
/** @overload */
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
|
||||
/** @overload */
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
|
||||
|
||||
/** @brief Detects keypoints and computes descriptors for them.
|
||||
|
||||
@param image Input 8-bit grayscale image.
|
||||
@param mask Optional input mask that marks the regions where we should detect features.
|
||||
@param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory.
|
||||
For GPU memory:
|
||||
- keypoints.ptr\<float\>(X_ROW)[i] contains x coordinate of the i'th feature.
|
||||
- keypoints.ptr\<float\>(Y_ROW)[i] contains y coordinate of the i'th feature.
|
||||
- keypoints.ptr\<float\>(RESPONSE_ROW)[i] contains the response of the i'th feature.
|
||||
- keypoints.ptr\<float\>(ANGLE_ROW)[i] contains orientation of the i'th feature.
|
||||
- keypoints.ptr\<float\>(OCTAVE_ROW)[i] contains the octave of the i'th feature.
|
||||
- keypoints.ptr\<float\>(SIZE_ROW)[i] contains the size of the i'th feature.
|
||||
@param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred
|
||||
before descriptors calculation.
|
||||
*/
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
|
||||
/** @overload */
|
||||
void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);
|
||||
|
||||
/** @brief Download keypoints from GPU to CPU memory.
|
||||
*/
|
||||
static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
/** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
|
||||
*/
|
||||
static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
//! returns the descriptor size in bytes
|
||||
inline int descriptorSize() const { return kBytes; }
|
||||
|
||||
inline void setFastParams(int threshold, bool nonmaxSuppression = true)
|
||||
{
|
||||
fastDetector_.threshold = threshold;
|
||||
fastDetector_.nonmaxSuppression = nonmaxSuppression;
|
||||
}
|
||||
|
||||
/** @brief Releases inner buffer memory.
|
||||
*/
|
||||
void release();
|
||||
static Ptr<ORB> create(int nfeatures=500,
|
||||
float scaleFactor=1.2f,
|
||||
int nlevels=8,
|
||||
int edgeThreshold=31,
|
||||
int firstLevel=0,
|
||||
int WTA_K=2,
|
||||
int scoreType=ORB::HARRIS_SCORE,
|
||||
int patchSize=31,
|
||||
int fastThreshold=20,
|
||||
bool blurForDescriptor=false);
|
||||
|
||||
//! if true, image will be blurred before descriptors calculation
|
||||
bool blurForDescriptor;
|
||||
|
||||
private:
|
||||
enum { kBytes = 32 };
|
||||
|
||||
void buildScalePyramids(const GpuMat& image, const GpuMat& mask);
|
||||
|
||||
void computeKeyPointsPyramid();
|
||||
|
||||
void computeDescriptors(GpuMat& descriptors);
|
||||
|
||||
void mergeKeyPoints(GpuMat& keypoints);
|
||||
|
||||
int nFeatures_;
|
||||
float scaleFactor_;
|
||||
int nLevels_;
|
||||
int edgeThreshold_;
|
||||
int firstLevel_;
|
||||
int WTA_K_;
|
||||
int scoreType_;
|
||||
int patchSize_;
|
||||
|
||||
//! The number of desired features per scale
|
||||
std::vector<size_t> n_features_per_level_;
|
||||
|
||||
//! Points to compute BRIEF descriptors from
|
||||
GpuMat pattern_;
|
||||
|
||||
std::vector<GpuMat> imagePyr_;
|
||||
std::vector<GpuMat> maskPyr_;
|
||||
|
||||
GpuMat buf_;
|
||||
|
||||
std::vector<GpuMat> keyPointsPyr_;
|
||||
std::vector<int> keyPointsCount_;
|
||||
|
||||
FAST_CUDA fastDetector_;
|
||||
|
||||
Ptr<cuda::Filter> blurFilter;
|
||||
|
||||
GpuMat d_keypoints_;
|
||||
virtual void setBlurForDescriptor(bool blurForDescriptor) = 0;
|
||||
virtual bool getBlurForDescriptor() const = 0;
|
||||
};
|
||||
|
||||
//! @}
|
||||
|
@ -64,15 +64,18 @@ PERF_TEST_P(Image_Threshold_NonMaxSuppression, FAST,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::FAST_CUDA d_fast(threshold, nonMaxSuppersion, 0.5);
|
||||
cv::Ptr<cv::cuda::FastFeatureDetector> d_fast =
|
||||
cv::cuda::FastFeatureDetector::create(threshold, nonMaxSuppersion,
|
||||
cv::FastFeatureDetector::TYPE_9_16,
|
||||
0.5 * img.size().area());
|
||||
|
||||
const cv::cuda::GpuMat d_img(img);
|
||||
cv::cuda::GpuMat d_keypoints;
|
||||
|
||||
TEST_CYCLE() d_fast(d_img, cv::cuda::GpuMat(), d_keypoints);
|
||||
TEST_CYCLE() d_fast->detectAsync(d_img, d_keypoints);
|
||||
|
||||
std::vector<cv::KeyPoint> gpu_keypoints;
|
||||
d_fast.downloadKeypoints(d_keypoints, gpu_keypoints);
|
||||
d_fast->convert(d_keypoints, gpu_keypoints);
|
||||
|
||||
sortKeyPoints(gpu_keypoints);
|
||||
|
||||
@ -106,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::ORB_CUDA d_orb(nFeatures);
|
||||
cv::Ptr<cv::cuda::ORB> d_orb = cv::cuda::ORB::create(nFeatures);
|
||||
|
||||
const cv::cuda::GpuMat d_img(img);
|
||||
cv::cuda::GpuMat d_keypoints, d_descriptors;
|
||||
|
||||
TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors);
|
||||
TEST_CYCLE() d_orb->detectAndComputeAsync(d_img, cv::noArray(), d_keypoints, d_descriptors);
|
||||
|
||||
std::vector<cv::KeyPoint> gpu_keypoints;
|
||||
d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints);
|
||||
d_orb->convert(d_keypoints, gpu_keypoints);
|
||||
|
||||
cv::Mat gpu_descriptors(d_descriptors);
|
||||
|
||||
@ -164,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA d_matcher(normType);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
|
||||
|
||||
const cv::cuda::GpuMat d_query(query);
|
||||
const cv::cuda::GpuMat d_train(train);
|
||||
cv::cuda::GpuMat d_trainIdx, d_distance;
|
||||
cv::cuda::GpuMat d_matches;
|
||||
|
||||
TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
TEST_CYCLE() d_matcher->matchAsync(d_query, d_train, d_matches);
|
||||
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches);
|
||||
d_matcher->matchConvert(d_matches, gpu_matches);
|
||||
|
||||
SANITY_CHECK_MATCHES(gpu_matches);
|
||||
}
|
||||
@ -223,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA d_matcher(normType);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
|
||||
|
||||
const cv::cuda::GpuMat d_query(query);
|
||||
const cv::cuda::GpuMat d_train(train);
|
||||
cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist;
|
||||
cv::cuda::GpuMat d_matches;
|
||||
|
||||
TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k);
|
||||
TEST_CYCLE() d_matcher->knnMatchAsync(d_query, d_train, d_matches, k);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl);
|
||||
d_matcher->knnMatchConvert(d_matches, matchesTbl);
|
||||
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
toOneRowMatches(matchesTbl, gpu_matches);
|
||||
@ -277,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
|
||||
|
||||
if (PERF_RUN_CUDA())
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA d_matcher(normType);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType);
|
||||
|
||||
const cv::cuda::GpuMat d_query(query);
|
||||
const cv::cuda::GpuMat d_train(train);
|
||||
cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance;
|
||||
cv::cuda::GpuMat d_matches;
|
||||
|
||||
TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance);
|
||||
TEST_CYCLE() d_matcher->radiusMatchAsync(d_query, d_train, d_matches, maxDistance);
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matchesTbl;
|
||||
d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl);
|
||||
d_matcher->radiusMatchConvert(d_matches, matchesTbl);
|
||||
|
||||
std::vector<cv::DMatch> gpu_matches;
|
||||
toOneRowMatches(matchesTbl, gpu_matches);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -279,7 +279,7 @@ namespace cv { namespace cuda { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold)
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
@ -290,29 +290,29 @@ namespace cv { namespace cuda { namespace device
|
||||
grid.x = divUp(img.cols - 6, block.x);
|
||||
grid.y = divUp(img.rows - 6, block.y);
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
|
||||
|
||||
if (score.data)
|
||||
{
|
||||
if (mask.data)
|
||||
calcKeypoints<true><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<true><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
else
|
||||
calcKeypoints<true><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<true><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (mask.data)
|
||||
calcKeypoints<false><<<grid, block>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<false><<<grid, block, 0, stream>>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold);
|
||||
else
|
||||
calcKeypoints<false><<<grid, block>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
calcKeypoints<false><<<grid, block, 0, stream>>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
|
||||
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
return count;
|
||||
}
|
||||
@ -356,7 +356,7 @@ namespace cv { namespace cuda { namespace device
|
||||
#endif
|
||||
}
|
||||
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response)
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
|
||||
@ -366,15 +366,15 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 grid;
|
||||
grid.x = divUp(count, block.x);
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) );
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) );
|
||||
|
||||
nonmaxSuppression<<<grid, block>>>(kpLoc, count, score, loc, response);
|
||||
nonmaxSuppression<<<grid, block, 0, stream>>>(kpLoc, count, score, loc, response);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
unsigned int new_count;
|
||||
cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) );
|
||||
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
return new_count;
|
||||
}
|
||||
|
@ -47,124 +47,162 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::cuda::FAST_CUDA::FAST_CUDA(int, bool, double) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::convertKeypoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::FAST_CUDA::release() { throw_no_cuda(); }
|
||||
int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; }
|
||||
int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; }
|
||||
Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr<cv::cuda::FastFeatureDetector>(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
cv::cuda::FAST_CUDA::FAST_CUDA(int _threshold, bool _nonmaxSuppression, double _keypointsRatio) :
|
||||
nonmaxSuppression(_nonmaxSuppression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
|
||||
{
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (image.empty())
|
||||
return;
|
||||
|
||||
(*this)(image, mask, d_keypoints_);
|
||||
downloadKeypoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
return;
|
||||
|
||||
Mat h_keypoints(d_keypoints);
|
||||
convertKeypoints(h_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (h_keypoints.empty())
|
||||
return;
|
||||
|
||||
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
|
||||
|
||||
int npoints = h_keypoints.cols;
|
||||
|
||||
keypoints.resize(npoints);
|
||||
|
||||
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
|
||||
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
|
||||
{
|
||||
calcKeyPointsLocation(img, mask);
|
||||
keypoints.cols = getKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace fast
|
||||
{
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold);
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response);
|
||||
int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream);
|
||||
int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask)
|
||||
namespace
|
||||
{
|
||||
using namespace cv::cuda::device::fast;
|
||||
|
||||
CV_Assert(img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
|
||||
|
||||
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
|
||||
|
||||
ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_);
|
||||
|
||||
if (nonmaxSuppression)
|
||||
class FAST_Impl : public cv::cuda::FastFeatureDetector
|
||||
{
|
||||
public:
|
||||
FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints);
|
||||
|
||||
virtual void detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask);
|
||||
virtual void detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream);
|
||||
|
||||
virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
virtual void setThreshold(int threshold) { threshold_ = threshold; }
|
||||
virtual int getThreshold() const { return threshold_; }
|
||||
|
||||
virtual void setNonmaxSuppression(bool f) { nonmaxSuppression_ = f; }
|
||||
virtual bool getNonmaxSuppression() const { return nonmaxSuppression_; }
|
||||
|
||||
virtual void setMaxNumPoints(int max_npoints) { max_npoints_ = max_npoints; }
|
||||
virtual int getMaxNumPoints() const { return max_npoints_; }
|
||||
|
||||
virtual void setType(int type) { CV_Assert( type == TYPE_9_16 ); }
|
||||
virtual int getType() const { return TYPE_9_16; }
|
||||
|
||||
private:
|
||||
int threshold_;
|
||||
bool nonmaxSuppression_;
|
||||
int max_npoints_;
|
||||
};
|
||||
|
||||
FAST_Impl::FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints) :
|
||||
threshold_(threshold), nonmaxSuppression_(nonmaxSuppression), max_npoints_(max_npoints)
|
||||
{
|
||||
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
|
||||
score_.setTo(Scalar::all(0));
|
||||
}
|
||||
|
||||
count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr<short2>(), maxKeypoints, nonmaxSuppression ? score_ : PtrStepSzi(), threshold);
|
||||
count_ = std::min(count_, maxKeypoints);
|
||||
void FAST_Impl::detect(InputArray _image, std::vector<KeyPoint>& keypoints, InputArray _mask)
|
||||
{
|
||||
if (_image.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
return count_;
|
||||
BufferPool pool(Stream::Null());
|
||||
GpuMat d_keypoints = pool.getBuffer(ROWS_COUNT, max_npoints_, CV_16SC2);
|
||||
|
||||
detectAsync(_image, d_keypoints, _mask, Stream::Null());
|
||||
convert(d_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void FAST_Impl::detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream)
|
||||
{
|
||||
using namespace cv::cuda::device::fast;
|
||||
|
||||
const GpuMat img = _image.getGpuMat();
|
||||
const GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
CV_Assert( img.type() == CV_8UC1 );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()) );
|
||||
|
||||
BufferPool pool(stream);
|
||||
|
||||
GpuMat kpLoc = pool.getBuffer(1, max_npoints_, CV_16SC2);
|
||||
|
||||
GpuMat score;
|
||||
if (nonmaxSuppression_)
|
||||
{
|
||||
score = pool.getBuffer(img.size(), CV_32SC1);
|
||||
score.setTo(Scalar::all(0), stream);
|
||||
}
|
||||
|
||||
int count = calcKeypoints_gpu(img, mask, kpLoc.ptr<short2>(), max_npoints_, score, threshold_, StreamAccessor::getStream(stream));
|
||||
count = std::min(count, max_npoints_);
|
||||
|
||||
if (count == 0)
|
||||
{
|
||||
_keypoints.release();
|
||||
return;
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, count, CV_32FC1, _keypoints);
|
||||
GpuMat& keypoints = _keypoints.getGpuMatRef();
|
||||
|
||||
if (nonmaxSuppression_)
|
||||
{
|
||||
count = nonmaxSuppression_gpu(kpLoc.ptr<short2>(), count, score, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW), StreamAccessor::getStream(stream));
|
||||
if (count == 0)
|
||||
{
|
||||
keypoints.release();
|
||||
}
|
||||
else
|
||||
{
|
||||
keypoints.cols = count;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
GpuMat locRow(1, count, kpLoc.type(), keypoints.ptr(0));
|
||||
kpLoc.colRange(0, count).copyTo(locRow, stream);
|
||||
keypoints.row(1).setTo(Scalar::all(0), stream);
|
||||
}
|
||||
}
|
||||
|
||||
void FAST_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (_gpu_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
Mat h_keypoints;
|
||||
if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
_gpu_keypoints.getGpuMat().download(h_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
h_keypoints = _gpu_keypoints.getMat();
|
||||
}
|
||||
|
||||
CV_Assert( h_keypoints.rows == ROWS_COUNT );
|
||||
CV_Assert( h_keypoints.elemSize() == 4 );
|
||||
|
||||
const int npoints = h_keypoints.cols;
|
||||
|
||||
keypoints.resize(npoints);
|
||||
|
||||
const short2* loc_row = h_keypoints.ptr<short2>(LOCATION_ROW);
|
||||
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat& keypoints)
|
||||
Ptr<cv::cuda::FastFeatureDetector> cv::cuda::FastFeatureDetector::create(int threshold, bool nonmaxSuppression, int type, int max_npoints)
|
||||
{
|
||||
using namespace cv::cuda::device::fast;
|
||||
|
||||
if (count_ == 0)
|
||||
return 0;
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
|
||||
|
||||
if (nonmaxSuppression)
|
||||
return nonmaxSuppression_gpu(kpLoc_.ptr<short2>(), count_, score_, keypoints.ptr<short2>(LOCATION_ROW), keypoints.ptr<float>(RESPONSE_ROW));
|
||||
|
||||
GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0));
|
||||
kpLoc_.colRange(0, count_).copyTo(locRow);
|
||||
keypoints.row(1).setTo(Scalar::all(0));
|
||||
|
||||
return count_;
|
||||
}
|
||||
|
||||
void cv::cuda::FAST_CUDA::release()
|
||||
{
|
||||
kpLoc_.release();
|
||||
score_.release();
|
||||
|
||||
d_keypoints_.release();
|
||||
CV_Assert( type == TYPE_9_16 );
|
||||
return makePtr<FAST_Impl>(threshold, nonmaxSuppression, max_npoints);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
85
modules/cudafeatures2d/src/feature2d_async.cpp
Normal file
85
modules/cudafeatures2d/src/feature2d_async.cpp
Normal file
@ -0,0 +1,85 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
cv::cuda::Feature2DAsync::~Feature2DAsync()
|
||||
{
|
||||
}
|
||||
|
||||
void cv::cuda::Feature2DAsync::detectAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
InputArray mask,
|
||||
Stream& stream)
|
||||
{
|
||||
if (image.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
detectAndComputeAsync(image, mask, keypoints, noArray(), false, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::Feature2DAsync::computeAsync(InputArray image,
|
||||
OutputArray keypoints,
|
||||
OutputArray descriptors,
|
||||
Stream& stream)
|
||||
{
|
||||
if (image.empty())
|
||||
{
|
||||
descriptors.release();
|
||||
return;
|
||||
}
|
||||
|
||||
detectAndComputeAsync(image, noArray(), keypoints, descriptors, true, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::Feature2DAsync::detectAndComputeAsync(InputArray /*image*/,
|
||||
InputArray /*mask*/,
|
||||
OutputArray /*keypoints*/,
|
||||
OutputArray /*descriptors*/,
|
||||
bool /*useProvidedKeypoints*/,
|
||||
Stream& /*stream*/)
|
||||
{
|
||||
CV_Error(Error::StsNotImplemented, "");
|
||||
}
|
@ -47,18 +47,7 @@ using namespace cv::cuda;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::cuda::ORB_CUDA::ORB_CUDA(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::release() { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat&) { throw_no_cuda(); }
|
||||
void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat&) { throw_no_cuda(); }
|
||||
Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int, float, int, int, int, int, int, int, int, bool) { throw_no_cuda(); return Ptr<cv::cuda::ORB>(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@ -346,7 +335,100 @@ namespace
|
||||
-1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/
|
||||
};
|
||||
|
||||
void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
|
||||
class ORB_Impl : public cv::cuda::ORB
|
||||
{
|
||||
public:
|
||||
ORB_Impl(int nfeatures,
|
||||
float scaleFactor,
|
||||
int nlevels,
|
||||
int edgeThreshold,
|
||||
int firstLevel,
|
||||
int WTA_K,
|
||||
int scoreType,
|
||||
int patchSize,
|
||||
int fastThreshold,
|
||||
bool blurForDescriptor);
|
||||
|
||||
virtual void detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints);
|
||||
virtual void detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream);
|
||||
|
||||
virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
virtual int descriptorSize() const { return kBytes; }
|
||||
virtual int descriptorType() const { return CV_8U; }
|
||||
virtual int defaultNorm() const { return NORM_HAMMING; }
|
||||
|
||||
virtual void setMaxFeatures(int maxFeatures) { nFeatures_ = maxFeatures; }
|
||||
virtual int getMaxFeatures() const { return nFeatures_; }
|
||||
|
||||
virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
|
||||
virtual double getScaleFactor() const { return scaleFactor_; }
|
||||
|
||||
virtual void setNLevels(int nlevels) { nLevels_ = nlevels; }
|
||||
virtual int getNLevels() const { return nLevels_; }
|
||||
|
||||
virtual void setEdgeThreshold(int edgeThreshold) { edgeThreshold_ = edgeThreshold; }
|
||||
virtual int getEdgeThreshold() const { return edgeThreshold_; }
|
||||
|
||||
virtual void setFirstLevel(int firstLevel) { firstLevel_ = firstLevel; }
|
||||
virtual int getFirstLevel() const { return firstLevel_; }
|
||||
|
||||
virtual void setWTA_K(int wta_k) { WTA_K_ = wta_k; }
|
||||
virtual int getWTA_K() const { return WTA_K_; }
|
||||
|
||||
virtual void setScoreType(int scoreType) { scoreType_ = scoreType; }
|
||||
virtual int getScoreType() const { return scoreType_; }
|
||||
|
||||
virtual void setPatchSize(int patchSize) { patchSize_ = patchSize; }
|
||||
virtual int getPatchSize() const { return patchSize_; }
|
||||
|
||||
virtual void setFastThreshold(int fastThreshold) { fastThreshold_ = fastThreshold; }
|
||||
virtual int getFastThreshold() const { return fastThreshold_; }
|
||||
|
||||
virtual void setBlurForDescriptor(bool blurForDescriptor) { blurForDescriptor_ = blurForDescriptor; }
|
||||
virtual bool getBlurForDescriptor() const { return blurForDescriptor_; }
|
||||
|
||||
private:
|
||||
int nFeatures_;
|
||||
float scaleFactor_;
|
||||
int nLevels_;
|
||||
int edgeThreshold_;
|
||||
int firstLevel_;
|
||||
int WTA_K_;
|
||||
int scoreType_;
|
||||
int patchSize_;
|
||||
int fastThreshold_;
|
||||
bool blurForDescriptor_;
|
||||
|
||||
private:
|
||||
void buildScalePyramids(InputArray _image, InputArray _mask);
|
||||
void computeKeyPointsPyramid();
|
||||
void computeDescriptors(OutputArray _descriptors);
|
||||
void mergeKeyPoints(OutputArray _keypoints);
|
||||
|
||||
private:
|
||||
Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
|
||||
|
||||
//! The number of desired features per scale
|
||||
std::vector<size_t> n_features_per_level_;
|
||||
|
||||
//! Points to compute BRIEF descriptors from
|
||||
GpuMat pattern_;
|
||||
|
||||
std::vector<GpuMat> imagePyr_;
|
||||
std::vector<GpuMat> maskPyr_;
|
||||
|
||||
GpuMat buf_;
|
||||
|
||||
std::vector<GpuMat> keyPointsPyr_;
|
||||
std::vector<int> keyPointsCount_;
|
||||
|
||||
Ptr<cuda::Filter> blurFilter_;
|
||||
|
||||
GpuMat d_keypoints_;
|
||||
};
|
||||
|
||||
static void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
|
||||
{
|
||||
RNG rng(0x12345678);
|
||||
|
||||
@ -381,7 +463,7 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void makeRandomPattern(int patchSize, Point* pattern, int npoints)
|
||||
static void makeRandomPattern(int patchSize, Point* pattern, int npoints)
|
||||
{
|
||||
// we always start with a fixed seed,
|
||||
// to make patterns the same on each run
|
||||
@ -393,155 +475,189 @@ namespace
|
||||
pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) :
|
||||
nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K),
|
||||
scoreType_(scoreType), patchSize_(patchSize),
|
||||
fastDetector_(DEFAULT_FAST_THRESHOLD)
|
||||
{
|
||||
CV_Assert(patchSize_ >= 2);
|
||||
|
||||
// fill the extractors and descriptors for the corresponding scales
|
||||
float factor = 1.0f / scaleFactor_;
|
||||
float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
|
||||
|
||||
n_features_per_level_.resize(nLevels_);
|
||||
size_t sum_n_features = 0;
|
||||
for (int level = 0; level < nLevels_ - 1; ++level)
|
||||
ORB_Impl::ORB_Impl(int nFeatures,
|
||||
float scaleFactor,
|
||||
int nLevels,
|
||||
int edgeThreshold,
|
||||
int firstLevel,
|
||||
int WTA_K,
|
||||
int scoreType,
|
||||
int patchSize,
|
||||
int fastThreshold,
|
||||
bool blurForDescriptor) :
|
||||
nFeatures_(nFeatures),
|
||||
scaleFactor_(scaleFactor),
|
||||
nLevels_(nLevels),
|
||||
edgeThreshold_(edgeThreshold),
|
||||
firstLevel_(firstLevel),
|
||||
WTA_K_(WTA_K),
|
||||
scoreType_(scoreType),
|
||||
patchSize_(patchSize),
|
||||
fastThreshold_(fastThreshold),
|
||||
blurForDescriptor_(blurForDescriptor)
|
||||
{
|
||||
n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
|
||||
sum_n_features += n_features_per_level_[level];
|
||||
n_desired_features_per_scale *= factor;
|
||||
}
|
||||
n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
|
||||
CV_Assert( patchSize_ >= 2 );
|
||||
CV_Assert( WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4 );
|
||||
|
||||
// pre-compute the end of a row in a circular patch
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
std::vector<int> u_max(half_patch_size + 2);
|
||||
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
|
||||
u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
|
||||
fastDetector_ = cuda::FastFeatureDetector::create(fastThreshold_);
|
||||
|
||||
// Make sure we are symmetric
|
||||
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
|
||||
{
|
||||
while (u_max[v_0] == u_max[v_0 + 1])
|
||||
++v_0;
|
||||
u_max[v] = v_0;
|
||||
++v_0;
|
||||
}
|
||||
CV_Assert(u_max.size() < 32);
|
||||
cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
|
||||
// fill the extractors and descriptors for the corresponding scales
|
||||
float factor = 1.0f / scaleFactor_;
|
||||
float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
|
||||
|
||||
// Calc pattern
|
||||
const int npoints = 512;
|
||||
Point pattern_buf[npoints];
|
||||
const Point* pattern0 = (const Point*)bit_pattern_31_;
|
||||
if (patchSize_ != 31)
|
||||
{
|
||||
pattern0 = pattern_buf;
|
||||
makeRandomPattern(patchSize_, pattern_buf, npoints);
|
||||
}
|
||||
|
||||
CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4);
|
||||
|
||||
Mat h_pattern;
|
||||
|
||||
if (WTA_K_ == 2)
|
||||
{
|
||||
h_pattern.create(2, npoints, CV_32SC1);
|
||||
|
||||
int* pattern_x_ptr = h_pattern.ptr<int>(0);
|
||||
int* pattern_y_ptr = h_pattern.ptr<int>(1);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
n_features_per_level_.resize(nLevels_);
|
||||
size_t sum_n_features = 0;
|
||||
for (int level = 0; level < nLevels_ - 1; ++level)
|
||||
{
|
||||
pattern_x_ptr[i] = pattern0[i].x;
|
||||
pattern_y_ptr[i] = pattern0[i].y;
|
||||
n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
|
||||
sum_n_features += n_features_per_level_[level];
|
||||
n_desired_features_per_scale *= factor;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int ntuples = descriptorSize() * 4;
|
||||
initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
|
||||
}
|
||||
n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
|
||||
|
||||
pattern_.upload(h_pattern);
|
||||
|
||||
blurFilter = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
|
||||
|
||||
blurForDescriptor = false;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
inline float getScale(float scaleFactor, int firstLevel, int level)
|
||||
{
|
||||
return pow(scaleFactor, level - firstLevel);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat& image, const GpuMat& mask)
|
||||
{
|
||||
CV_Assert(image.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
|
||||
|
||||
imagePyr_.resize(nLevels_);
|
||||
maskPyr_.resize(nLevels_);
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
|
||||
|
||||
Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
|
||||
|
||||
ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
|
||||
ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
|
||||
maskPyr_[level].setTo(Scalar::all(255));
|
||||
|
||||
// Compute the resized image
|
||||
if (level != firstLevel_)
|
||||
// pre-compute the end of a row in a circular patch
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
std::vector<int> u_max(half_patch_size + 2);
|
||||
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
|
||||
{
|
||||
if (level < firstLevel_)
|
||||
{
|
||||
cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
|
||||
}
|
||||
|
||||
if (!mask.empty())
|
||||
cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
}
|
||||
else
|
||||
{
|
||||
cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
// Make sure we are symmetric
|
||||
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
|
||||
{
|
||||
while (u_max[v_0] == u_max[v_0 + 1])
|
||||
++v_0;
|
||||
u_max[v] = v_0;
|
||||
++v_0;
|
||||
}
|
||||
CV_Assert( u_max.size() < 32 );
|
||||
cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
|
||||
|
||||
if (!mask.empty())
|
||||
{
|
||||
cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
|
||||
}
|
||||
// Calc pattern
|
||||
const int npoints = 512;
|
||||
Point pattern_buf[npoints];
|
||||
const Point* pattern0 = (const Point*)bit_pattern_31_;
|
||||
if (patchSize_ != 31)
|
||||
{
|
||||
pattern0 = pattern_buf;
|
||||
makeRandomPattern(patchSize_, pattern_buf, npoints);
|
||||
}
|
||||
|
||||
Mat h_pattern;
|
||||
if (WTA_K_ == 2)
|
||||
{
|
||||
h_pattern.create(2, npoints, CV_32SC1);
|
||||
|
||||
int* pattern_x_ptr = h_pattern.ptr<int>(0);
|
||||
int* pattern_y_ptr = h_pattern.ptr<int>(1);
|
||||
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
pattern_x_ptr[i] = pattern0[i].x;
|
||||
pattern_y_ptr[i] = pattern0[i].y;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
image.copyTo(imagePyr_[level]);
|
||||
|
||||
if (!mask.empty())
|
||||
mask.copyTo(maskPyr_[level]);
|
||||
int ntuples = descriptorSize() * 4;
|
||||
initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
|
||||
}
|
||||
|
||||
// Filter keypoints by image border
|
||||
ensureSizeIsEnough(sz, CV_8UC1, buf_);
|
||||
buf_.setTo(Scalar::all(0));
|
||||
Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
|
||||
buf_(inner).setTo(Scalar::all(255));
|
||||
pattern_.upload(h_pattern);
|
||||
|
||||
cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
|
||||
blurFilter_ = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
//takes keypoints and culls them by the response
|
||||
void cull(GpuMat& keypoints, int& count, int n_points)
|
||||
void ORB_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints)
|
||||
{
|
||||
CV_Assert( useProvidedKeypoints == false );
|
||||
|
||||
detectAndComputeAsync(_image, _mask, d_keypoints_, _descriptors, false, Stream::Null());
|
||||
convert(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void ORB_Impl::detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream)
|
||||
{
|
||||
CV_Assert( useProvidedKeypoints == false );
|
||||
|
||||
buildScalePyramids(_image, _mask);
|
||||
computeKeyPointsPyramid();
|
||||
if (_descriptors.needed())
|
||||
{
|
||||
computeDescriptors(_descriptors);
|
||||
}
|
||||
mergeKeyPoints(_keypoints);
|
||||
}
|
||||
|
||||
static float getScale(float scaleFactor, int firstLevel, int level)
|
||||
{
|
||||
return pow(scaleFactor, level - firstLevel);
|
||||
}
|
||||
|
||||
void ORB_Impl::buildScalePyramids(InputArray _image, InputArray _mask)
|
||||
{
|
||||
const GpuMat image = _image.getGpuMat();
|
||||
const GpuMat mask = _mask.getGpuMat();
|
||||
|
||||
CV_Assert( image.type() == CV_8UC1 );
|
||||
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
|
||||
|
||||
imagePyr_.resize(nLevels_);
|
||||
maskPyr_.resize(nLevels_);
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
|
||||
|
||||
Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
|
||||
|
||||
ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
|
||||
ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
|
||||
maskPyr_[level].setTo(Scalar::all(255));
|
||||
|
||||
// Compute the resized image
|
||||
if (level != firstLevel_)
|
||||
{
|
||||
if (level < firstLevel_)
|
||||
{
|
||||
cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
|
||||
if (!mask.empty())
|
||||
cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
}
|
||||
else
|
||||
{
|
||||
cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
|
||||
if (!mask.empty())
|
||||
{
|
||||
cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
|
||||
cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
image.copyTo(imagePyr_[level]);
|
||||
|
||||
if (!mask.empty())
|
||||
mask.copyTo(maskPyr_[level]);
|
||||
}
|
||||
|
||||
// Filter keypoints by image border
|
||||
ensureSizeIsEnough(sz, CV_8UC1, buf_);
|
||||
buf_.setTo(Scalar::all(0));
|
||||
Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
|
||||
buf_(inner).setTo(Scalar::all(255));
|
||||
|
||||
cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
|
||||
}
|
||||
}
|
||||
|
||||
// takes keypoints and culls them by the response
|
||||
static void cull(GpuMat& keypoints, int& count, int n_points)
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
@ -554,222 +670,199 @@ namespace
|
||||
return;
|
||||
}
|
||||
|
||||
count = cull_gpu(keypoints.ptr<int>(FAST_CUDA::LOCATION_ROW), keypoints.ptr<float>(FAST_CUDA::RESPONSE_ROW), count, n_points);
|
||||
count = cull_gpu(keypoints.ptr<int>(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr<float>(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::computeKeyPointsPyramid()
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
|
||||
keyPointsPyr_.resize(nLevels_);
|
||||
keyPointsCount_.resize(nLevels_);
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
void ORB_Impl::computeKeyPointsPyramid()
|
||||
{
|
||||
keyPointsCount_[level] = fastDetector_.calcKeyPointsLocation(imagePyr_[level], maskPyr_[level]);
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
int half_patch_size = patchSize_ / 2;
|
||||
|
||||
ensureSizeIsEnough(3, keyPointsCount_[level], CV_32FC1, keyPointsPyr_[level]);
|
||||
keyPointsPyr_.resize(nLevels_);
|
||||
keyPointsCount_.resize(nLevels_);
|
||||
|
||||
GpuMat fastKpRange = keyPointsPyr_[level].rowRange(0, 2);
|
||||
keyPointsCount_[level] = fastDetector_.getKeyPoints(fastKpRange);
|
||||
fastDetector_->setThreshold(fastThreshold_);
|
||||
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
int n_features = static_cast<int>(n_features_per_level_[level]);
|
||||
|
||||
if (scoreType_ == ORB::HARRIS_SCORE)
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
// Keep more points than necessary as FAST does not give amazing corners
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
|
||||
fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area());
|
||||
|
||||
// Compute the Harris cornerness (better scoring than FAST)
|
||||
HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0);
|
||||
GpuMat fastKpRange;
|
||||
fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
|
||||
|
||||
keyPointsCount_[level] = fastKpRange.cols;
|
||||
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]);
|
||||
fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
|
||||
|
||||
const int n_features = static_cast<int>(n_features_per_level_[level]);
|
||||
|
||||
if (scoreType_ == ORB::HARRIS_SCORE)
|
||||
{
|
||||
// Keep more points than necessary as FAST does not give amazing corners
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
|
||||
|
||||
// Compute the Harris cornerness (better scoring than FAST)
|
||||
HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0);
|
||||
}
|
||||
|
||||
//cull to the final desired level, using the new Harris scores or the original FAST scores.
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], n_features);
|
||||
|
||||
// Compute orientation
|
||||
IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
|
||||
}
|
||||
|
||||
//cull to the final desired level, using the new Harris scores or the original FAST scores.
|
||||
cull(keyPointsPyr_[level], keyPointsCount_[level], n_features);
|
||||
|
||||
// Compute orientation
|
||||
IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat& descriptors)
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
{
|
||||
descriptors.release();
|
||||
return;
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors);
|
||||
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
void ORB_Impl::computeDescriptors(OutputArray _descriptors)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
if (blurForDescriptor)
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
{
|
||||
// preprocess the resized image
|
||||
ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
|
||||
blurFilter->apply(imagePyr_[level], buf_);
|
||||
_descriptors.release();
|
||||
return;
|
||||
}
|
||||
|
||||
computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2),
|
||||
keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0);
|
||||
ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, _descriptors);
|
||||
GpuMat descriptors = _descriptors.getGpuMat();
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
|
||||
|
||||
if (blurForDescriptor_)
|
||||
{
|
||||
// preprocess the resized image
|
||||
ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
|
||||
blurFilter_->apply(imagePyr_[level], buf_);
|
||||
}
|
||||
|
||||
computeOrbDescriptor_gpu(blurForDescriptor_ ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2),
|
||||
keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0);
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat& keypoints)
|
||||
{
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
void ORB_Impl::mergeKeyPoints(OutputArray _keypoints)
|
||||
{
|
||||
keypoints.release();
|
||||
return;
|
||||
using namespace cv::cuda::device::orb;
|
||||
|
||||
int nAllkeypoints = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
nAllkeypoints += keyPointsCount_[level];
|
||||
|
||||
if (nAllkeypoints == 0)
|
||||
{
|
||||
_keypoints.release();
|
||||
return;
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, _keypoints);
|
||||
GpuMat& keypoints = _keypoints.getGpuMatRef();
|
||||
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
|
||||
float sf = getScale(scaleFactor_, firstLevel_, level);
|
||||
|
||||
GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]);
|
||||
|
||||
float locScale = level != firstLevel_ ? sf : 1.0f;
|
||||
|
||||
mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);
|
||||
|
||||
GpuMat range = keyPointsRange.rowRange(2, 4);
|
||||
keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
|
||||
|
||||
keyPointsRange.row(4).setTo(Scalar::all(level));
|
||||
keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
}
|
||||
}
|
||||
|
||||
ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints);
|
||||
|
||||
int offset = 0;
|
||||
|
||||
for (int level = 0; level < nLevels_; ++level)
|
||||
void ORB_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (keyPointsCount_[level] == 0)
|
||||
continue;
|
||||
if (_gpu_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
float sf = getScale(scaleFactor_, firstLevel_, level);
|
||||
Mat h_keypoints;
|
||||
if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
|
||||
{
|
||||
_gpu_keypoints.getGpuMat().download(h_keypoints);
|
||||
}
|
||||
else
|
||||
{
|
||||
h_keypoints = _gpu_keypoints.getMat();
|
||||
}
|
||||
|
||||
GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]);
|
||||
CV_Assert( h_keypoints.rows == ROWS_COUNT );
|
||||
CV_Assert( h_keypoints.type() == CV_32FC1 );
|
||||
|
||||
float locScale = level != firstLevel_ ? sf : 1.0f;
|
||||
const int npoints = h_keypoints.cols;
|
||||
|
||||
mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);
|
||||
keypoints.resize(npoints);
|
||||
|
||||
GpuMat range = keyPointsRange.rowRange(2, 4);
|
||||
keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
|
||||
const float* x_ptr = h_keypoints.ptr<float>(X_ROW);
|
||||
const float* y_ptr = h_keypoints.ptr<float>(Y_ROW);
|
||||
const float* response_ptr = h_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
const float* angle_ptr = h_keypoints.ptr<float>(ANGLE_ROW);
|
||||
const float* octave_ptr = h_keypoints.ptr<float>(OCTAVE_ROW);
|
||||
const float* size_ptr = h_keypoints.ptr<float>(SIZE_ROW);
|
||||
|
||||
keyPointsRange.row(4).setTo(Scalar::all(level));
|
||||
keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
|
||||
for (int i = 0; i < npoints; ++i)
|
||||
{
|
||||
KeyPoint kp;
|
||||
|
||||
offset += keyPointsCount_[level];
|
||||
kp.pt.x = x_ptr[i];
|
||||
kp.pt.y = y_ptr[i];
|
||||
kp.response = response_ptr[i];
|
||||
kp.angle = angle_ptr[i];
|
||||
kp.octave = static_cast<int>(octave_ptr[i]);
|
||||
kp.size = size_ptr[i];
|
||||
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat &d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int nfeatures,
|
||||
float scaleFactor,
|
||||
int nlevels,
|
||||
int edgeThreshold,
|
||||
int firstLevel,
|
||||
int WTA_K,
|
||||
int scoreType,
|
||||
int patchSize,
|
||||
int fastThreshold,
|
||||
bool blurForDescriptor)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
Mat h_keypoints(d_keypoints);
|
||||
|
||||
convertKeyPoints(h_keypoints, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
if (d_keypoints.empty())
|
||||
{
|
||||
keypoints.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT);
|
||||
|
||||
const float* x_ptr = d_keypoints.ptr<float>(X_ROW);
|
||||
const float* y_ptr = d_keypoints.ptr<float>(Y_ROW);
|
||||
const float* response_ptr = d_keypoints.ptr<float>(RESPONSE_ROW);
|
||||
const float* angle_ptr = d_keypoints.ptr<float>(ANGLE_ROW);
|
||||
const float* octave_ptr = d_keypoints.ptr<float>(OCTAVE_ROW);
|
||||
const float* size_ptr = d_keypoints.ptr<float>(SIZE_ROW);
|
||||
|
||||
keypoints.resize(d_keypoints.cols);
|
||||
|
||||
for (int i = 0; i < d_keypoints.cols; ++i)
|
||||
{
|
||||
KeyPoint kp;
|
||||
|
||||
kp.pt.x = x_ptr[i];
|
||||
kp.pt.y = y_ptr[i];
|
||||
kp.response = response_ptr[i];
|
||||
kp.angle = angle_ptr[i];
|
||||
kp.octave = static_cast<int>(octave_ptr[i]);
|
||||
kp.size = size_ptr[i];
|
||||
|
||||
keypoints[i] = kp;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints)
|
||||
{
|
||||
buildScalePyramids(image, mask);
|
||||
computeKeyPointsPyramid();
|
||||
mergeKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors)
|
||||
{
|
||||
buildScalePyramids(image, mask);
|
||||
computeKeyPointsPyramid();
|
||||
computeDescriptors(descriptors);
|
||||
mergeKeyPoints(keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
|
||||
{
|
||||
(*this)(image, mask, d_keypoints_);
|
||||
downloadKeyPoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors)
|
||||
{
|
||||
(*this)(image, mask, d_keypoints_, descriptors);
|
||||
downloadKeyPoints(d_keypoints_, keypoints);
|
||||
}
|
||||
|
||||
void cv::cuda::ORB_CUDA::release()
|
||||
{
|
||||
imagePyr_.clear();
|
||||
maskPyr_.clear();
|
||||
|
||||
buf_.release();
|
||||
|
||||
keyPointsPyr_.clear();
|
||||
|
||||
fastDetector_.release();
|
||||
|
||||
d_keypoints_.release();
|
||||
return makePtr<ORB_Impl>(nfeatures, scaleFactor, nlevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize, fastThreshold, blurForDescriptor);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -76,15 +76,14 @@ CUDA_TEST_P(FAST, Accuracy)
|
||||
cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(image.empty());
|
||||
|
||||
cv::cuda::FAST_CUDA fast(threshold);
|
||||
fast.nonmaxSuppression = nonmaxSuppression;
|
||||
cv::Ptr<cv::cuda::FastFeatureDetector> fast = cv::cuda::FastFeatureDetector::create(threshold, nonmaxSuppression);
|
||||
|
||||
if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
|
||||
fast->detect(loadMat(image), keypoints);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -94,7 +93,7 @@ CUDA_TEST_P(FAST, Accuracy)
|
||||
else
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
fast(loadMat(image), cv::cuda::GpuMat(), keypoints);
|
||||
fast->detect(loadMat(image), keypoints);
|
||||
|
||||
std::vector<cv::KeyPoint> keypoints_gold;
|
||||
cv::FAST(image, keypoints_gold, threshold, nonmaxSuppression);
|
||||
@ -123,7 +122,7 @@ namespace
|
||||
IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool)
|
||||
}
|
||||
|
||||
CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE)
|
||||
CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE)
|
||||
|
||||
PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
|
||||
{
|
||||
@ -163,8 +162,9 @@ CUDA_TEST_P(ORB, Accuracy)
|
||||
cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
|
||||
mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
|
||||
|
||||
cv::cuda::ORB_CUDA orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
|
||||
orb.blurForDescriptor = blurForDescriptor;
|
||||
cv::Ptr<cv::cuda::ORB> orb =
|
||||
cv::cuda::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel,
|
||||
WTA_K, scoreType, patchSize, 20, blurForDescriptor);
|
||||
|
||||
if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
|
||||
{
|
||||
@ -172,7 +172,7 @@ CUDA_TEST_P(ORB, Accuracy)
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::cuda::GpuMat descriptors;
|
||||
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -183,7 +183,7 @@ CUDA_TEST_P(ORB, Accuracy)
|
||||
{
|
||||
std::vector<cv::KeyPoint> keypoints;
|
||||
cv::cuda::GpuMat descriptors;
|
||||
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
orb->detectAndCompute(loadMat(image), loadMat(mask), keypoints, descriptors);
|
||||
|
||||
cv::Ptr<cv::ORB> orb_gold = cv::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
|
||||
|
||||
@ -208,7 +208,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Features2D, ORB, testing::Combine(
|
||||
testing::Values(ORB_ScaleFactor(1.2f)),
|
||||
testing::Values(ORB_LevelsCount(4), ORB_LevelsCount(8)),
|
||||
testing::Values(ORB_EdgeThreshold(31)),
|
||||
testing::Values(ORB_firstLevel(0), ORB_firstLevel(2)),
|
||||
testing::Values(ORB_firstLevel(0)),
|
||||
testing::Values(ORB_WTA_K(2), ORB_WTA_K(3), ORB_WTA_K(4)),
|
||||
testing::Values(ORB_ScoreType(cv::ORB::HARRIS_SCORE)),
|
||||
testing::Values(ORB_PatchSize(31), ORB_PatchSize(29)),
|
||||
@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, Match_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
cv::cuda::GpuMat mask;
|
||||
if (useMask)
|
||||
@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
|
||||
}
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
matcher.match(loadMat(query), loadMat(train), matches, mask);
|
||||
matcher->match(loadMat(query), loadMat(train), matches, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, Match_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
|
||||
|
||||
std::vector<cv::DMatch> matches;
|
||||
if (useMask)
|
||||
matcher.match(cv::cuda::GpuMat(query), matches, masks);
|
||||
matcher->match(cv::cuda::GpuMat(query), matches, masks);
|
||||
else
|
||||
matcher.match(cv::cuda::GpuMat(query), matches);
|
||||
matcher->match(cv::cuda::GpuMat(query), matches);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 2;
|
||||
|
||||
@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
}
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 3;
|
||||
|
||||
@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
|
||||
}
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 2;
|
||||
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
if (useMask)
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
else
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int knn = 3;
|
||||
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
if (useMask)
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks);
|
||||
else
|
||||
matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const float radius = 1.f / countFactor;
|
||||
|
||||
@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
try
|
||||
{
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
}
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius, mask);
|
||||
matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius, mask);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single)
|
||||
|
||||
CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
{
|
||||
cv::cuda::BFMatcher_CUDA matcher(normCode);
|
||||
cv::Ptr<cv::cuda::DescriptorMatcher> matcher =
|
||||
cv::cuda::DescriptorMatcher::createBFMatcher(normCode);
|
||||
|
||||
const int n = 3;
|
||||
const float radius = 1.f / countFactor * n;
|
||||
@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
cv::cuda::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher->add(std::vector<cv::cuda::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::cuda::GpuMat> masks(2);
|
||||
@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
try
|
||||
{
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection)
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
if (useMask)
|
||||
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks);
|
||||
else
|
||||
matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius);
|
||||
matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius);
|
||||
|
||||
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
|
||||
|
||||
|
@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
|
||||
|
||||
matches_info.matches.clear();
|
||||
|
||||
Ptr<DescriptorMatcher> matcher;
|
||||
Ptr<cv::DescriptorMatcher> matcher;
|
||||
#if 0 // TODO check this
|
||||
if (ocl::useOpenCL())
|
||||
{
|
||||
@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
|
||||
descriptors1_.upload(features1.descriptors);
|
||||
descriptors2_.upload(features2.descriptors);
|
||||
|
||||
BFMatcher_CUDA matcher(NORM_L2);
|
||||
Ptr<cuda::DescriptorMatcher> matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
|
||||
|
||||
MatchesSet matches;
|
||||
|
||||
// Find 1->2 matches
|
||||
pair_matches.clear();
|
||||
matcher.knnMatchSingle(descriptors1_, descriptors2_, train_idx_, distance_, all_dist_, 2);
|
||||
matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
|
||||
matcher->knnMatch(descriptors1_, descriptors2_, pair_matches, 2);
|
||||
for (size_t i = 0; i < pair_matches.size(); ++i)
|
||||
{
|
||||
if (pair_matches[i].size() < 2)
|
||||
@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat
|
||||
|
||||
// Find 2->1 matches
|
||||
pair_matches.clear();
|
||||
matcher.knnMatchSingle(descriptors2_, descriptors1_, train_idx_, distance_, all_dist_, 2);
|
||||
matcher.knnMatchDownload(train_idx_, distance_, pair_matches);
|
||||
matcher->knnMatch(descriptors2_, descriptors1_, pair_matches, 2);
|
||||
for (size_t i = 0; i < pair_matches.size(); ++i)
|
||||
{
|
||||
if (pair_matches[i].size() < 2)
|
||||
|
@ -322,14 +322,14 @@ TEST(FAST)
|
||||
FAST(src, keypoints, 20);
|
||||
CPU_OFF;
|
||||
|
||||
cuda::FAST_CUDA d_FAST(20);
|
||||
cv::Ptr<cv::cuda::FastFeatureDetector> d_FAST = cv::cuda::FastFeatureDetector::create(20);
|
||||
cuda::GpuMat d_src(src);
|
||||
cuda::GpuMat d_keypoints;
|
||||
|
||||
d_FAST(d_src, cuda::GpuMat(), d_keypoints);
|
||||
d_FAST->detectAsync(d_src, d_keypoints);
|
||||
|
||||
CUDA_ON;
|
||||
d_FAST(d_src, cuda::GpuMat(), d_keypoints);
|
||||
d_FAST->detectAsync(d_src, d_keypoints);
|
||||
CUDA_OFF;
|
||||
}
|
||||
|
||||
@ -350,15 +350,15 @@ TEST(ORB)
|
||||
orb->detectAndCompute(src, Mat(), keypoints, descriptors);
|
||||
CPU_OFF;
|
||||
|
||||
cuda::ORB_CUDA d_orb;
|
||||
Ptr<cuda::ORB> d_orb = cuda::ORB::create();
|
||||
cuda::GpuMat d_src(src);
|
||||
cuda::GpuMat d_keypoints;
|
||||
cuda::GpuMat d_descriptors;
|
||||
|
||||
d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
|
||||
d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
|
||||
|
||||
CUDA_ON;
|
||||
d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
|
||||
d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
|
||||
CUDA_OFF;
|
||||
}
|
||||
|
||||
@ -379,14 +379,14 @@ TEST(BruteForceMatcher)
|
||||
|
||||
// Init CUDA matcher
|
||||
|
||||
cuda::BFMatcher_CUDA d_matcher(NORM_L2);
|
||||
Ptr<cuda::DescriptorMatcher> d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
|
||||
|
||||
cuda::GpuMat d_query(query);
|
||||
cuda::GpuMat d_train(train);
|
||||
|
||||
// Output
|
||||
vector< vector<DMatch> > matches(2);
|
||||
cuda::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches;
|
||||
cuda::GpuMat d_matches;
|
||||
|
||||
SUBTEST << "match";
|
||||
|
||||
@ -396,10 +396,10 @@ TEST(BruteForceMatcher)
|
||||
matcher.match(query, train, matches[0]);
|
||||
CPU_OFF;
|
||||
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
d_matcher->matchAsync(d_query, d_train, d_matches);
|
||||
|
||||
CUDA_ON;
|
||||
d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
|
||||
d_matcher->matchAsync(d_query, d_train, d_matches);
|
||||
CUDA_OFF;
|
||||
|
||||
SUBTEST << "knnMatch";
|
||||
@ -410,10 +410,10 @@ TEST(BruteForceMatcher)
|
||||
matcher.knnMatch(query, train, matches, 2);
|
||||
CPU_OFF;
|
||||
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
|
||||
d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
|
||||
|
||||
CUDA_ON;
|
||||
d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
|
||||
d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
|
||||
CUDA_OFF;
|
||||
|
||||
SUBTEST << "radiusMatch";
|
||||
@ -426,12 +426,10 @@ TEST(BruteForceMatcher)
|
||||
matcher.radiusMatch(query, train, matches, max_distance);
|
||||
CPU_OFF;
|
||||
|
||||
d_trainIdx.release();
|
||||
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
|
||||
d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
|
||||
|
||||
CUDA_ON;
|
||||
d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
|
||||
d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
|
||||
CUDA_OFF;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user