From 6f91b7f6a4605591f16a121058940481e5b5a2c7 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 12 Jan 2015 12:46:44 +0300 Subject: [PATCH 1/8] remove firstLevel=2 test case, because CPU implementation fails --- modules/cudafeatures2d/test/test_features2d.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp index 6e4479b7d..468024a5d 100644 --- a/modules/cudafeatures2d/test/test_features2d.cpp +++ b/modules/cudafeatures2d/test/test_features2d.cpp @@ -208,7 +208,7 @@ INSTANTIATE_TEST_CASE_P(CUDA_Features2D, ORB, testing::Combine( testing::Values(ORB_ScaleFactor(1.2f)), testing::Values(ORB_LevelsCount(4), ORB_LevelsCount(8)), testing::Values(ORB_EdgeThreshold(31)), - testing::Values(ORB_firstLevel(0), ORB_firstLevel(2)), + testing::Values(ORB_firstLevel(0)), testing::Values(ORB_WTA_K(2), ORB_WTA_K(3), ORB_WTA_K(4)), testing::Values(ORB_ScoreType(cv::ORB::HARRIS_SCORE)), testing::Values(ORB_PatchSize(31), ORB_PatchSize(29)), From 14ef62ed661893d44546a3c0b08a518bd39ee99d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 12 Jan 2015 18:11:09 +0300 Subject: [PATCH 2/8] refactor CUDA FAST feature detector algorithm: use new FastFeatureDetector abstract interface and hidden implementation --- .../include/opencv2/cudafeatures2d.hpp | 115 +++------ .../cudafeatures2d/perf/perf_features2d.cpp | 9 +- modules/cudafeatures2d/src/cuda/fast.cu | 30 +-- modules/cudafeatures2d/src/fast.cpp | 238 ++++++++++-------- modules/cudafeatures2d/src/orb.cpp | 24 +- .../cudafeatures2d/test/test_features2d.cpp | 7 +- samples/gpu/performance/tests.cpp | 6 +- 7 files changed, 213 insertions(+), 216 deletions(-) diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index f61d2dfd0..f6f674d2a 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -48,6 +48,7 @@ #endif #include "opencv2/core/cuda.hpp" +#include "opencv2/features2d.hpp" #include "opencv2/cudafilters.hpp" /** @@ -228,91 +229,49 @@ private: std::vector trainDescCollection; }; -/** @brief Class used for corner detection using the FAST algorithm. : +// +// Feature2DAsync +// + +/** @brief Abstract base class for 2D image feature detectors and descriptor extractors. */ -class CV_EXPORTS FAST_CUDA +class CV_EXPORTS Feature2DAsync +{ +public: + virtual ~Feature2DAsync() {} + + virtual void detectAsync(InputArray image, OutputArray keypoints, + InputArray mask = noArray(), + Stream& stream = Stream::Null()) = 0; + + virtual void convert(InputArray gpu_keypoints, std::vector& keypoints) = 0; +}; + +// +// FastFeatureDetector +// + +/** @brief Wrapping class for feature detection using the FAST method. + */ +class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync { public: enum { LOCATION_ROW = 0, RESPONSE_ROW, - ROWS_COUNT + ROWS_COUNT, + + FEATURE_SIZE = 7 }; - //! all features have same size - static const int FEATURE_SIZE = 7; + static Ptr create(int threshold=10, + bool nonmaxSuppression=true, + int type=FastFeatureDetector::TYPE_9_16, + int max_npoints = 5000); - /** @brief Constructor. - - @param threshold Threshold on difference between intensity of the central pixel and pixels on a - circle around this pixel. - @param nonmaxSuppression If it is true, non-maximum suppression is applied to detected corners - (keypoints). - @param keypointsRatio Inner buffer size for keypoints store is determined as (keypointsRatio \* - image_width \* image_height). - */ - explicit FAST_CUDA(int threshold, bool nonmaxSuppression = true, double keypointsRatio = 0.05); - - /** @brief Finds the keypoints using FAST detector. - - @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are - supported. - @param mask Optional input mask that marks the regions where we should detect features. - @param keypoints The output vector of keypoints. Can be stored both in CPU and GPU memory. For GPU - memory: - - keypoints.ptr\(LOCATION_ROW)[i] will contain location of i'th point - - keypoints.ptr\(RESPONSE_ROW)[i] will contain response of i'th point (if non-maximum - suppression is applied) - */ - void operator ()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints); - /** @overload */ - void operator ()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints); - - /** @brief Download keypoints from GPU to CPU memory. - */ - static void downloadKeypoints(const GpuMat& d_keypoints, std::vector& keypoints); - - /** @brief Converts keypoints from CUDA representation to vector of KeyPoint. - */ - static void convertKeypoints(const Mat& h_keypoints, std::vector& keypoints); - - /** @brief Releases inner buffer memory. - */ - void release(); - - bool nonmaxSuppression; - - int threshold; - - //! max keypoints = keypointsRatio * img.size().area() - double keypointsRatio; - - /** @brief Find keypoints and compute it's response if nonmaxSuppression is true. - - @param image Image where keypoints (corners) are detected. Only 8-bit grayscale images are - supported. - @param mask Optional input mask that marks the regions where we should detect features. - - The function returns count of detected keypoints. - */ - int calcKeyPointsLocation(const GpuMat& image, const GpuMat& mask); - - /** @brief Gets final array of keypoints. - - @param keypoints The output vector of keypoints. - - The function performs non-max suppression if needed and returns final count of keypoints. - */ - int getKeyPoints(GpuMat& keypoints); - -private: - GpuMat kpLoc_; - int count_; - - GpuMat score_; - - GpuMat d_keypoints_; + virtual void setMaxNumPoints(int max_npoints) = 0; + virtual int getMaxNumPoints() const = 0; }; /** @brief Class for extracting ORB features and descriptors from an image. : @@ -388,8 +347,8 @@ public: inline void setFastParams(int threshold, bool nonmaxSuppression = true) { - fastDetector_.threshold = threshold; - fastDetector_.nonmaxSuppression = nonmaxSuppression; + fastDetector_->setThreshold(threshold); + fastDetector_->setNonmaxSuppression(nonmaxSuppression); } /** @brief Releases inner buffer memory. @@ -433,7 +392,7 @@ private: std::vector keyPointsPyr_; std::vector keyPointsCount_; - FAST_CUDA fastDetector_; + Ptr fastDetector_; Ptr blurFilter; diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp index 26eb434f4..da3cd77db 100644 --- a/modules/cudafeatures2d/perf/perf_features2d.cpp +++ b/modules/cudafeatures2d/perf/perf_features2d.cpp @@ -64,15 +64,18 @@ PERF_TEST_P(Image_Threshold_NonMaxSuppression, FAST, if (PERF_RUN_CUDA()) { - cv::cuda::FAST_CUDA d_fast(threshold, nonMaxSuppersion, 0.5); + cv::Ptr d_fast = + cv::cuda::FastFeatureDetector::create(threshold, nonMaxSuppersion, + cv::FastFeatureDetector::TYPE_9_16, + 0.5 * img.size().area()); const cv::cuda::GpuMat d_img(img); cv::cuda::GpuMat d_keypoints; - TEST_CYCLE() d_fast(d_img, cv::cuda::GpuMat(), d_keypoints); + TEST_CYCLE() d_fast->detectAsync(d_img, d_keypoints); std::vector gpu_keypoints; - d_fast.downloadKeypoints(d_keypoints, gpu_keypoints); + d_fast->convert(d_keypoints, gpu_keypoints); sortKeyPoints(gpu_keypoints); diff --git a/modules/cudafeatures2d/src/cuda/fast.cu b/modules/cudafeatures2d/src/cuda/fast.cu index 7aa888ac3..72235d4e5 100644 --- a/modules/cudafeatures2d/src/cuda/fast.cu +++ b/modules/cudafeatures2d/src/cuda/fast.cu @@ -279,7 +279,7 @@ namespace cv { namespace cuda { namespace device #endif } - int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold) + int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream) { void* counter_ptr; cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); @@ -290,29 +290,29 @@ namespace cv { namespace cuda { namespace device grid.x = divUp(img.cols - 6, block.x); grid.y = divUp(img.rows - 6, block.y); - cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) ); + cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) ); if (score.data) { if (mask.data) - calcKeypoints<<>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold); + calcKeypoints<<>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold); else - calcKeypoints<<>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold); + calcKeypoints<<>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold); } else { if (mask.data) - calcKeypoints<<>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold); + calcKeypoints<<>>(img, SingleMask(mask), kpLoc, maxKeypoints, score, threshold); else - calcKeypoints<<>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold); + calcKeypoints<<>>(img, WithOutMask(), kpLoc, maxKeypoints, score, threshold); } cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); - unsigned int count; - cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); + cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) ); + + cudaSafeCall( cudaStreamSynchronize(stream) ); return count; } @@ -356,7 +356,7 @@ namespace cv { namespace cuda { namespace device #endif } - int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response) + int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream) { void* counter_ptr; cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) ); @@ -366,15 +366,15 @@ namespace cv { namespace cuda { namespace device dim3 grid; grid.x = divUp(count, block.x); - cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(unsigned int)) ); + cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(unsigned int), stream) ); - nonmaxSuppression<<>>(kpLoc, count, score, loc, response); + nonmaxSuppression<<>>(kpLoc, count, score, loc, response); cudaSafeCall( cudaGetLastError() ); - cudaSafeCall( cudaDeviceSynchronize() ); - unsigned int new_count; - cudaSafeCall( cudaMemcpy(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) ); + cudaSafeCall( cudaMemcpyAsync(&new_count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost, stream) ); + + cudaSafeCall( cudaStreamSynchronize(stream) ); return new_count; } diff --git a/modules/cudafeatures2d/src/fast.cpp b/modules/cudafeatures2d/src/fast.cpp index aa77aa87b..cb22ea54d 100644 --- a/modules/cudafeatures2d/src/fast.cpp +++ b/modules/cudafeatures2d/src/fast.cpp @@ -47,124 +47,162 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::cuda::FAST_CUDA::FAST_CUDA(int, bool, double) { throw_no_cuda(); } -void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::FAST_CUDA::operator ()(const GpuMat&, const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::FAST_CUDA::convertKeypoints(const Mat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::FAST_CUDA::release() { throw_no_cuda(); } -int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat&, const GpuMat&) { throw_no_cuda(); return 0; } -int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat&) { throw_no_cuda(); return 0; } +Ptr cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ -cv::cuda::FAST_CUDA::FAST_CUDA(int _threshold, bool _nonmaxSuppression, double _keypointsRatio) : - nonmaxSuppression(_nonmaxSuppression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0) -{ -} - -void cv::cuda::FAST_CUDA::operator ()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints) -{ - if (image.empty()) - return; - - (*this)(image, mask, d_keypoints_); - downloadKeypoints(d_keypoints_, keypoints); -} - -void cv::cuda::FAST_CUDA::downloadKeypoints(const GpuMat& d_keypoints, std::vector& keypoints) -{ - if (d_keypoints.empty()) - return; - - Mat h_keypoints(d_keypoints); - convertKeypoints(h_keypoints, keypoints); -} - -void cv::cuda::FAST_CUDA::convertKeypoints(const Mat& h_keypoints, std::vector& keypoints) -{ - if (h_keypoints.empty()) - return; - - CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4); - - int npoints = h_keypoints.cols; - - keypoints.resize(npoints); - - const short2* loc_row = h_keypoints.ptr(LOCATION_ROW); - const float* response_row = h_keypoints.ptr(RESPONSE_ROW); - - for (int i = 0; i < npoints; ++i) - { - KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast(FEATURE_SIZE), -1, response_row[i]); - keypoints[i] = kp; - } -} - -void cv::cuda::FAST_CUDA::operator ()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints) -{ - calcKeyPointsLocation(img, mask); - keypoints.cols = getKeyPoints(keypoints); -} - namespace cv { namespace cuda { namespace device { namespace fast { - int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold); - int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response); + int calcKeypoints_gpu(PtrStepSzb img, PtrStepSzb mask, short2* kpLoc, int maxKeypoints, PtrStepSzi score, int threshold, cudaStream_t stream); + int nonmaxSuppression_gpu(const short2* kpLoc, int count, PtrStepSzi score, short2* loc, float* response, cudaStream_t stream); } }}} -int cv::cuda::FAST_CUDA::calcKeyPointsLocation(const GpuMat& img, const GpuMat& mask) +namespace { - using namespace cv::cuda::device::fast; - - CV_Assert(img.type() == CV_8UC1); - CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size())); - - int maxKeypoints = static_cast(keypointsRatio * img.size().area()); - - ensureSizeIsEnough(1, maxKeypoints, CV_16SC2, kpLoc_); - - if (nonmaxSuppression) + class FAST_Impl : public cv::cuda::FastFeatureDetector + { + public: + FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints); + + virtual void detect(InputArray _image, std::vector& keypoints, InputArray _mask); + virtual void detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream); + + virtual void convert(InputArray _gpu_keypoints, std::vector& keypoints); + + virtual void setThreshold(int threshold) { threshold_ = threshold; } + virtual int getThreshold() const { return threshold_; } + + virtual void setNonmaxSuppression(bool f) { nonmaxSuppression_ = f; } + virtual bool getNonmaxSuppression() const { return nonmaxSuppression_; } + + virtual void setMaxNumPoints(int max_npoints) { max_npoints_ = max_npoints; } + virtual int getMaxNumPoints() const { return max_npoints_; } + + virtual void setType(int type) { CV_Assert( type == TYPE_9_16 ); } + virtual int getType() const { return TYPE_9_16; } + + private: + int threshold_; + bool nonmaxSuppression_; + int max_npoints_; + }; + + FAST_Impl::FAST_Impl(int threshold, bool nonmaxSuppression, int max_npoints) : + threshold_(threshold), nonmaxSuppression_(nonmaxSuppression), max_npoints_(max_npoints) { - ensureSizeIsEnough(img.size(), CV_32SC1, score_); - score_.setTo(Scalar::all(0)); } - count_ = calcKeypoints_gpu(img, mask, kpLoc_.ptr(), maxKeypoints, nonmaxSuppression ? score_ : PtrStepSzi(), threshold); - count_ = std::min(count_, maxKeypoints); + void FAST_Impl::detect(InputArray _image, std::vector& keypoints, InputArray _mask) + { + if (_image.empty()) + { + keypoints.clear(); + return; + } - return count_; + BufferPool pool(Stream::Null()); + GpuMat d_keypoints = pool.getBuffer(ROWS_COUNT, max_npoints_, CV_16SC2); + + detectAsync(_image, d_keypoints, _mask, Stream::Null()); + convert(d_keypoints, keypoints); + } + + void FAST_Impl::detectAsync(InputArray _image, OutputArray _keypoints, InputArray _mask, Stream& stream) + { + using namespace cv::cuda::device::fast; + + const GpuMat img = _image.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); + + CV_Assert( img.type() == CV_8UC1 ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()) ); + + BufferPool pool(stream); + + GpuMat kpLoc = pool.getBuffer(1, max_npoints_, CV_16SC2); + + GpuMat score; + if (nonmaxSuppression_) + { + score = pool.getBuffer(img.size(), CV_32SC1); + score.setTo(Scalar::all(0), stream); + } + + int count = calcKeypoints_gpu(img, mask, kpLoc.ptr(), max_npoints_, score, threshold_, StreamAccessor::getStream(stream)); + count = std::min(count, max_npoints_); + + if (count == 0) + { + _keypoints.release(); + return; + } + + ensureSizeIsEnough(ROWS_COUNT, count, CV_32FC1, _keypoints); + GpuMat& keypoints = _keypoints.getGpuMatRef(); + + if (nonmaxSuppression_) + { + count = nonmaxSuppression_gpu(kpLoc.ptr(), count, score, keypoints.ptr(LOCATION_ROW), keypoints.ptr(RESPONSE_ROW), StreamAccessor::getStream(stream)); + if (count == 0) + { + keypoints.release(); + } + else + { + keypoints.cols = count; + } + } + else + { + GpuMat locRow(1, count, kpLoc.type(), keypoints.ptr(0)); + kpLoc.colRange(0, count).copyTo(locRow, stream); + keypoints.row(1).setTo(Scalar::all(0), stream); + } + } + + void FAST_Impl::convert(InputArray _gpu_keypoints, std::vector& keypoints) + { + if (_gpu_keypoints.empty()) + { + keypoints.clear(); + return; + } + + Mat h_keypoints; + if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_keypoints.getGpuMat().download(h_keypoints); + } + else + { + h_keypoints = _gpu_keypoints.getMat(); + } + + CV_Assert( h_keypoints.rows == ROWS_COUNT ); + CV_Assert( h_keypoints.elemSize() == 4 ); + + const int npoints = h_keypoints.cols; + + keypoints.resize(npoints); + + const short2* loc_row = h_keypoints.ptr(LOCATION_ROW); + const float* response_row = h_keypoints.ptr(RESPONSE_ROW); + + for (int i = 0; i < npoints; ++i) + { + KeyPoint kp(loc_row[i].x, loc_row[i].y, static_cast(FEATURE_SIZE), -1, response_row[i]); + keypoints[i] = kp; + } + } } -int cv::cuda::FAST_CUDA::getKeyPoints(GpuMat& keypoints) +Ptr cv::cuda::FastFeatureDetector::create(int threshold, bool nonmaxSuppression, int type, int max_npoints) { - using namespace cv::cuda::device::fast; - - if (count_ == 0) - return 0; - - ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints); - - if (nonmaxSuppression) - return nonmaxSuppression_gpu(kpLoc_.ptr(), count_, score_, keypoints.ptr(LOCATION_ROW), keypoints.ptr(RESPONSE_ROW)); - - GpuMat locRow(1, count_, kpLoc_.type(), keypoints.ptr(0)); - kpLoc_.colRange(0, count_).copyTo(locRow); - keypoints.row(1).setTo(Scalar::all(0)); - - return count_; -} - -void cv::cuda::FAST_CUDA::release() -{ - kpLoc_.release(); - score_.release(); - - d_keypoints_.release(); + CV_Assert( type == TYPE_9_16 ); + return makePtr(threshold, nonmaxSuppression, max_npoints); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudafeatures2d/src/orb.cpp b/modules/cudafeatures2d/src/orb.cpp index 8d8afe8f0..c04649b1f 100644 --- a/modules/cudafeatures2d/src/orb.cpp +++ b/modules/cudafeatures2d/src/orb.cpp @@ -398,7 +398,7 @@ namespace cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) : nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K), scoreType_(scoreType), patchSize_(patchSize), - fastDetector_(DEFAULT_FAST_THRESHOLD) + fastDetector_(cuda::FastFeatureDetector::create(DEFAULT_FAST_THRESHOLD)) { CV_Assert(patchSize_ >= 2); @@ -554,7 +554,7 @@ namespace return; } - count = cull_gpu(keypoints.ptr(FAST_CUDA::LOCATION_ROW), keypoints.ptr(FAST_CUDA::RESPONSE_ROW), count, n_points); + count = cull_gpu(keypoints.ptr(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points); } } } @@ -570,20 +570,20 @@ void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() for (int level = 0; level < nLevels_; ++level) { - keyPointsCount_[level] = fastDetector_.calcKeyPointsLocation(imagePyr_[level], maskPyr_[level]); + fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area()); + + GpuMat fastKpRange; + fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null()); + + keyPointsCount_[level] = fastKpRange.cols; if (keyPointsCount_[level] == 0) continue; - ensureSizeIsEnough(3, keyPointsCount_[level], CV_32FC1, keyPointsPyr_[level]); + ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]); + fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2)); - GpuMat fastKpRange = keyPointsPyr_[level].rowRange(0, 2); - keyPointsCount_[level] = fastDetector_.getKeyPoints(fastKpRange); - - if (keyPointsCount_[level] == 0) - continue; - - int n_features = static_cast(n_features_per_level_[level]); + const int n_features = static_cast(n_features_per_level_[level]); if (scoreType_ == ORB::HARRIS_SCORE) { @@ -767,8 +767,6 @@ void cv::cuda::ORB_CUDA::release() keyPointsPyr_.clear(); - fastDetector_.release(); - d_keypoints_.release(); } diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp index 468024a5d..9a8d76ce3 100644 --- a/modules/cudafeatures2d/test/test_features2d.cpp +++ b/modules/cudafeatures2d/test/test_features2d.cpp @@ -76,15 +76,14 @@ CUDA_TEST_P(FAST, Accuracy) cv::Mat image = readImage("features2d/aloe.png", cv::IMREAD_GRAYSCALE); ASSERT_FALSE(image.empty()); - cv::cuda::FAST_CUDA fast(threshold); - fast.nonmaxSuppression = nonmaxSuppression; + cv::Ptr fast = cv::cuda::FastFeatureDetector::create(threshold, nonmaxSuppression); if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS)) { try { std::vector keypoints; - fast(loadMat(image), cv::cuda::GpuMat(), keypoints); + fast->detect(loadMat(image), keypoints); } catch (const cv::Exception& e) { @@ -94,7 +93,7 @@ CUDA_TEST_P(FAST, Accuracy) else { std::vector keypoints; - fast(loadMat(image), cv::cuda::GpuMat(), keypoints); + fast->detect(loadMat(image), keypoints); std::vector keypoints_gold; cv::FAST(image, keypoints_gold, threshold, nonmaxSuppression); diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 2e7faa334..8869a1b66 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -322,14 +322,14 @@ TEST(FAST) FAST(src, keypoints, 20); CPU_OFF; - cuda::FAST_CUDA d_FAST(20); + cv::Ptr d_FAST = cv::cuda::FastFeatureDetector::create(20); cuda::GpuMat d_src(src); cuda::GpuMat d_keypoints; - d_FAST(d_src, cuda::GpuMat(), d_keypoints); + d_FAST->detectAsync(d_src, d_keypoints); CUDA_ON; - d_FAST(d_src, cuda::GpuMat(), d_keypoints); + d_FAST->detectAsync(d_src, d_keypoints); CUDA_OFF; } From 9f77ffb03f443331080bfb7d59c4f10d0dd08815 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 12 Jan 2015 18:26:41 +0300 Subject: [PATCH 3/8] extend Feature2DAsync interface --- .../include/opencv2/cudafeatures2d.hpp | 26 ++++-- .../cudafeatures2d/src/feature2d_async.cpp | 85 +++++++++++++++++++ 2 files changed, 103 insertions(+), 8 deletions(-) create mode 100644 modules/cudafeatures2d/src/feature2d_async.cpp diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index f6f674d2a..a193eb6f8 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -233,26 +233,36 @@ private: // Feature2DAsync // -/** @brief Abstract base class for 2D image feature detectors and descriptor extractors. - */ class CV_EXPORTS Feature2DAsync { public: - virtual ~Feature2DAsync() {} + virtual ~Feature2DAsync(); - virtual void detectAsync(InputArray image, OutputArray keypoints, + virtual void detectAsync(InputArray image, + OutputArray keypoints, InputArray mask = noArray(), - Stream& stream = Stream::Null()) = 0; + Stream& stream = Stream::Null()); - virtual void convert(InputArray gpu_keypoints, std::vector& keypoints) = 0; + virtual void computeAsync(InputArray image, + OutputArray keypoints, + OutputArray descriptors, + Stream& stream = Stream::Null()); + + virtual void detectAndComputeAsync(InputArray image, + InputArray mask, + OutputArray keypoints, + OutputArray descriptors, + bool useProvidedKeypoints=false, + Stream& stream = Stream::Null()); + + virtual void convert(InputArray gpu_keypoints, + std::vector& keypoints) = 0; }; // // FastFeatureDetector // -/** @brief Wrapping class for feature detection using the FAST method. - */ class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync { public: diff --git a/modules/cudafeatures2d/src/feature2d_async.cpp b/modules/cudafeatures2d/src/feature2d_async.cpp new file mode 100644 index 000000000..202a72537 --- /dev/null +++ b/modules/cudafeatures2d/src/feature2d_async.cpp @@ -0,0 +1,85 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +cv::cuda::Feature2DAsync::~Feature2DAsync() +{ +} + +void cv::cuda::Feature2DAsync::detectAsync(InputArray image, + OutputArray keypoints, + InputArray mask, + Stream& stream) +{ + if (image.empty()) + { + keypoints.clear(); + return; + } + + detectAndComputeAsync(image, mask, keypoints, noArray(), false, stream); +} + +void cv::cuda::Feature2DAsync::computeAsync(InputArray image, + OutputArray keypoints, + OutputArray descriptors, + Stream& stream) +{ + if (image.empty()) + { + descriptors.release(); + return; + } + + detectAndComputeAsync(image, noArray(), keypoints, descriptors, true, stream); +} + +void cv::cuda::Feature2DAsync::detectAndComputeAsync(InputArray /*image*/, + InputArray /*mask*/, + OutputArray /*keypoints*/, + OutputArray /*descriptors*/, + bool /*useProvidedKeypoints*/, + Stream& /*stream*/) +{ + CV_Error(Error::StsNotImplemented, ""); +} From 554ddd2ec49f4df46c7b23ac257c21fda0a449d0 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 13 Jan 2015 10:25:34 +0300 Subject: [PATCH 4/8] fix compilation without CUDA --- modules/cudafeatures2d/src/fast.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cudafeatures2d/src/fast.cpp b/modules/cudafeatures2d/src/fast.cpp index cb22ea54d..2095ef7cf 100644 --- a/modules/cudafeatures2d/src/fast.cpp +++ b/modules/cudafeatures2d/src/fast.cpp @@ -47,7 +47,7 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -Ptr cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr(); } +Ptr cv::cuda::FastFeatureDetector::create(int, bool, int, int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ From f960a5707d99d9a55da8f2b12e96bcad65fd9b90 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 13 Jan 2015 10:40:58 +0300 Subject: [PATCH 5/8] refactor CUDA ORB feature detector/extractor algorithm: use new abstract interface and hidden implementation --- .../include/opencv2/cudafeatures2d.hpp | 125 +-- .../cudafeatures2d/perf/perf_features2d.cpp | 6 +- modules/cudafeatures2d/src/orb.cpp | 741 ++++++++++-------- .../cudafeatures2d/test/test_features2d.cpp | 11 +- samples/gpu/performance/tests.cpp | 6 +- 5 files changed, 447 insertions(+), 442 deletions(-) diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index a193eb6f8..4a78d50e6 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -284,9 +284,11 @@ public: virtual int getMaxNumPoints() const = 0; }; -/** @brief Class for extracting ORB features and descriptors from an image. : - */ -class CV_EXPORTS ORB_CUDA +// +// ORB +// + +class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync { public: enum @@ -300,113 +302,20 @@ public: ROWS_COUNT }; - enum - { - DEFAULT_FAST_THRESHOLD = 20 - }; - - /** @brief Constructor. - - @param nFeatures The number of desired features. - @param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to - the next. - @param nLevels The number of levels in the scale pyramid. - @param edgeThreshold How far from the boundary the points should be. - @param firstLevel The level at which the image is given. If 1, that means we will also look at the - image scaleFactor times bigger. - @param WTA_K - @param scoreType - @param patchSize - */ - explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, - int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31); - - /** @overload */ - void operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints); - /** @overload */ - void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints); - - /** @brief Detects keypoints and computes descriptors for them. - - @param image Input 8-bit grayscale image. - @param mask Optional input mask that marks the regions where we should detect features. - @param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory. - For GPU memory: - - keypoints.ptr\(X_ROW)[i] contains x coordinate of the i'th feature. - - keypoints.ptr\(Y_ROW)[i] contains y coordinate of the i'th feature. - - keypoints.ptr\(RESPONSE_ROW)[i] contains the response of the i'th feature. - - keypoints.ptr\(ANGLE_ROW)[i] contains orientation of the i'th feature. - - keypoints.ptr\(OCTAVE_ROW)[i] contains the octave of the i'th feature. - - keypoints.ptr\(SIZE_ROW)[i] contains the size of the i'th feature. - @param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred - before descriptors calculation. - */ - void operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints, GpuMat& descriptors); - /** @overload */ - void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors); - - /** @brief Download keypoints from GPU to CPU memory. - */ - static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector& keypoints); - /** @brief Converts keypoints from CUDA representation to vector of KeyPoint. - */ - static void convertKeyPoints(const Mat& d_keypoints, std::vector& keypoints); - - //! returns the descriptor size in bytes - inline int descriptorSize() const { return kBytes; } - - inline void setFastParams(int threshold, bool nonmaxSuppression = true) - { - fastDetector_->setThreshold(threshold); - fastDetector_->setNonmaxSuppression(nonmaxSuppression); - } - - /** @brief Releases inner buffer memory. - */ - void release(); + static Ptr create(int nfeatures=500, + float scaleFactor=1.2f, + int nlevels=8, + int edgeThreshold=31, + int firstLevel=0, + int WTA_K=2, + int scoreType=ORB::HARRIS_SCORE, + int patchSize=31, + int fastThreshold=20, + bool blurForDescriptor=false); //! if true, image will be blurred before descriptors calculation - bool blurForDescriptor; - -private: - enum { kBytes = 32 }; - - void buildScalePyramids(const GpuMat& image, const GpuMat& mask); - - void computeKeyPointsPyramid(); - - void computeDescriptors(GpuMat& descriptors); - - void mergeKeyPoints(GpuMat& keypoints); - - int nFeatures_; - float scaleFactor_; - int nLevels_; - int edgeThreshold_; - int firstLevel_; - int WTA_K_; - int scoreType_; - int patchSize_; - - //! The number of desired features per scale - std::vector n_features_per_level_; - - //! Points to compute BRIEF descriptors from - GpuMat pattern_; - - std::vector imagePyr_; - std::vector maskPyr_; - - GpuMat buf_; - - std::vector keyPointsPyr_; - std::vector keyPointsCount_; - - Ptr fastDetector_; - - Ptr blurFilter; - - GpuMat d_keypoints_; + virtual void setBlurForDescriptor(bool blurForDescriptor) = 0; + virtual bool getBlurForDescriptor() const = 0; }; //! @} diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp index da3cd77db..0dcb0434f 100644 --- a/modules/cudafeatures2d/perf/perf_features2d.cpp +++ b/modules/cudafeatures2d/perf/perf_features2d.cpp @@ -109,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB, if (PERF_RUN_CUDA()) { - cv::cuda::ORB_CUDA d_orb(nFeatures); + cv::Ptr d_orb = cv::cuda::ORB::create(nFeatures); const cv::cuda::GpuMat d_img(img); cv::cuda::GpuMat d_keypoints, d_descriptors; - TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors); + TEST_CYCLE() d_orb->detectAndComputeAsync(d_img, cv::noArray(), d_keypoints, d_descriptors); std::vector gpu_keypoints; - d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints); + d_orb->convert(d_keypoints, gpu_keypoints); cv::Mat gpu_descriptors(d_descriptors); diff --git a/modules/cudafeatures2d/src/orb.cpp b/modules/cudafeatures2d/src/orb.cpp index c04649b1f..6bfdd5ac4 100644 --- a/modules/cudafeatures2d/src/orb.cpp +++ b/modules/cudafeatures2d/src/orb.cpp @@ -47,18 +47,7 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::cuda::ORB_CUDA::ORB_CUDA(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::release() { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat&) { throw_no_cuda(); } +Ptr cv::cuda::ORB::create(int, float, int, int, int, int, int, int, int, bool) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -346,7 +335,100 @@ namespace -1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/ }; - void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize) + class ORB_Impl : public cv::cuda::ORB + { + public: + ORB_Impl(int nfeatures, + float scaleFactor, + int nlevels, + int edgeThreshold, + int firstLevel, + int WTA_K, + int scoreType, + int patchSize, + int fastThreshold, + bool blurForDescriptor); + + virtual void detectAndCompute(InputArray _image, InputArray _mask, std::vector& keypoints, OutputArray _descriptors, bool useProvidedKeypoints); + virtual void detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream); + + virtual void convert(InputArray _gpu_keypoints, std::vector& keypoints); + + virtual int descriptorSize() const { return kBytes; } + virtual int descriptorType() const { return CV_8U; } + virtual int defaultNorm() const { return NORM_HAMMING; } + + virtual void setMaxFeatures(int maxFeatures) { nFeatures_ = maxFeatures; } + virtual int getMaxFeatures() const { return nFeatures_; } + + virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; } + virtual double getScaleFactor() const { return scaleFactor_; } + + virtual void setNLevels(int nlevels) { nLevels_ = nlevels; } + virtual int getNLevels() const { return nLevels_; } + + virtual void setEdgeThreshold(int edgeThreshold) { edgeThreshold_ = edgeThreshold; } + virtual int getEdgeThreshold() const { return edgeThreshold_; } + + virtual void setFirstLevel(int firstLevel) { firstLevel_ = firstLevel; } + virtual int getFirstLevel() const { return firstLevel_; } + + virtual void setWTA_K(int wta_k) { WTA_K_ = wta_k; } + virtual int getWTA_K() const { return WTA_K_; } + + virtual void setScoreType(int scoreType) { scoreType_ = scoreType; } + virtual int getScoreType() const { return scoreType_; } + + virtual void setPatchSize(int patchSize) { patchSize_ = patchSize; } + virtual int getPatchSize() const { return patchSize_; } + + virtual void setFastThreshold(int fastThreshold) { fastThreshold_ = fastThreshold; } + virtual int getFastThreshold() const { return fastThreshold_; } + + virtual void setBlurForDescriptor(bool blurForDescriptor) { blurForDescriptor_ = blurForDescriptor; } + virtual bool getBlurForDescriptor() const { return blurForDescriptor_; } + + private: + int nFeatures_; + float scaleFactor_; + int nLevels_; + int edgeThreshold_; + int firstLevel_; + int WTA_K_; + int scoreType_; + int patchSize_; + int fastThreshold_; + bool blurForDescriptor_; + + private: + void buildScalePyramids(InputArray _image, InputArray _mask); + void computeKeyPointsPyramid(); + void computeDescriptors(OutputArray _descriptors); + void mergeKeyPoints(OutputArray _keypoints); + + private: + Ptr fastDetector_; + + //! The number of desired features per scale + std::vector n_features_per_level_; + + //! Points to compute BRIEF descriptors from + GpuMat pattern_; + + std::vector imagePyr_; + std::vector maskPyr_; + + GpuMat buf_; + + std::vector keyPointsPyr_; + std::vector keyPointsCount_; + + Ptr blurFilter_; + + GpuMat d_keypoints_; + }; + + static void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize) { RNG rng(0x12345678); @@ -381,7 +463,7 @@ namespace } } - void makeRandomPattern(int patchSize, Point* pattern, int npoints) + static void makeRandomPattern(int patchSize, Point* pattern, int npoints) { // we always start with a fixed seed, // to make patterns the same on each run @@ -393,155 +475,189 @@ namespace pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1); } } -} -cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) : - nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K), - scoreType_(scoreType), patchSize_(patchSize), - fastDetector_(cuda::FastFeatureDetector::create(DEFAULT_FAST_THRESHOLD)) -{ - CV_Assert(patchSize_ >= 2); - - // fill the extractors and descriptors for the corresponding scales - float factor = 1.0f / scaleFactor_; - float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_)); - - n_features_per_level_.resize(nLevels_); - size_t sum_n_features = 0; - for (int level = 0; level < nLevels_ - 1; ++level) + ORB_Impl::ORB_Impl(int nFeatures, + float scaleFactor, + int nLevels, + int edgeThreshold, + int firstLevel, + int WTA_K, + int scoreType, + int patchSize, + int fastThreshold, + bool blurForDescriptor) : + nFeatures_(nFeatures), + scaleFactor_(scaleFactor), + nLevels_(nLevels), + edgeThreshold_(edgeThreshold), + firstLevel_(firstLevel), + WTA_K_(WTA_K), + scoreType_(scoreType), + patchSize_(patchSize), + fastThreshold_(fastThreshold), + blurForDescriptor_(blurForDescriptor) { - n_features_per_level_[level] = cvRound(n_desired_features_per_scale); - sum_n_features += n_features_per_level_[level]; - n_desired_features_per_scale *= factor; - } - n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features; + CV_Assert( patchSize_ >= 2 ); + CV_Assert( WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4 ); - // pre-compute the end of a row in a circular patch - int half_patch_size = patchSize_ / 2; - std::vector u_max(half_patch_size + 2); - for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v) - u_max[v] = cvRound(std::sqrt(static_cast(half_patch_size * half_patch_size - v * v))); + fastDetector_ = cuda::FastFeatureDetector::create(fastThreshold_); - // Make sure we are symmetric - for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v) - { - while (u_max[v_0] == u_max[v_0 + 1]) - ++v_0; - u_max[v] = v_0; - ++v_0; - } - CV_Assert(u_max.size() < 32); - cv::cuda::device::orb::loadUMax(&u_max[0], static_cast(u_max.size())); + // fill the extractors and descriptors for the corresponding scales + float factor = 1.0f / scaleFactor_; + float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_)); - // Calc pattern - const int npoints = 512; - Point pattern_buf[npoints]; - const Point* pattern0 = (const Point*)bit_pattern_31_; - if (patchSize_ != 31) - { - pattern0 = pattern_buf; - makeRandomPattern(patchSize_, pattern_buf, npoints); - } - - CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4); - - Mat h_pattern; - - if (WTA_K_ == 2) - { - h_pattern.create(2, npoints, CV_32SC1); - - int* pattern_x_ptr = h_pattern.ptr(0); - int* pattern_y_ptr = h_pattern.ptr(1); - - for (int i = 0; i < npoints; ++i) + n_features_per_level_.resize(nLevels_); + size_t sum_n_features = 0; + for (int level = 0; level < nLevels_ - 1; ++level) { - pattern_x_ptr[i] = pattern0[i].x; - pattern_y_ptr[i] = pattern0[i].y; + n_features_per_level_[level] = cvRound(n_desired_features_per_scale); + sum_n_features += n_features_per_level_[level]; + n_desired_features_per_scale *= factor; } - } - else - { - int ntuples = descriptorSize() * 4; - initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints); - } + n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features; - pattern_.upload(h_pattern); - - blurFilter = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101); - - blurForDescriptor = false; -} - -namespace -{ - inline float getScale(float scaleFactor, int firstLevel, int level) - { - return pow(scaleFactor, level - firstLevel); - } -} - -void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat& image, const GpuMat& mask) -{ - CV_Assert(image.type() == CV_8UC1); - CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); - - imagePyr_.resize(nLevels_); - maskPyr_.resize(nLevels_); - - for (int level = 0; level < nLevels_; ++level) - { - float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level); - - Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale)); - - ensureSizeIsEnough(sz, image.type(), imagePyr_[level]); - ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]); - maskPyr_[level].setTo(Scalar::all(255)); - - // Compute the resized image - if (level != firstLevel_) + // pre-compute the end of a row in a circular patch + int half_patch_size = patchSize_ / 2; + std::vector u_max(half_patch_size + 2); + for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v) { - if (level < firstLevel_) - { - cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR); + u_max[v] = cvRound(std::sqrt(static_cast(half_patch_size * half_patch_size - v * v))); + } - if (!mask.empty()) - cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR); - } - else - { - cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR); + // Make sure we are symmetric + for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v) + { + while (u_max[v_0] == u_max[v_0 + 1]) + ++v_0; + u_max[v] = v_0; + ++v_0; + } + CV_Assert( u_max.size() < 32 ); + cv::cuda::device::orb::loadUMax(&u_max[0], static_cast(u_max.size())); - if (!mask.empty()) - { - cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR); - cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO); - } + // Calc pattern + const int npoints = 512; + Point pattern_buf[npoints]; + const Point* pattern0 = (const Point*)bit_pattern_31_; + if (patchSize_ != 31) + { + pattern0 = pattern_buf; + makeRandomPattern(patchSize_, pattern_buf, npoints); + } + + Mat h_pattern; + if (WTA_K_ == 2) + { + h_pattern.create(2, npoints, CV_32SC1); + + int* pattern_x_ptr = h_pattern.ptr(0); + int* pattern_y_ptr = h_pattern.ptr(1); + + for (int i = 0; i < npoints; ++i) + { + pattern_x_ptr[i] = pattern0[i].x; + pattern_y_ptr[i] = pattern0[i].y; } } else { - image.copyTo(imagePyr_[level]); - - if (!mask.empty()) - mask.copyTo(maskPyr_[level]); + int ntuples = descriptorSize() * 4; + initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints); } - // Filter keypoints by image border - ensureSizeIsEnough(sz, CV_8UC1, buf_); - buf_.setTo(Scalar::all(0)); - Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_); - buf_(inner).setTo(Scalar::all(255)); + pattern_.upload(h_pattern); - cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]); + blurFilter_ = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101); } -} -namespace -{ - //takes keypoints and culls them by the response - void cull(GpuMat& keypoints, int& count, int n_points) + void ORB_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector& keypoints, OutputArray _descriptors, bool useProvidedKeypoints) + { + CV_Assert( useProvidedKeypoints == false ); + + detectAndComputeAsync(_image, _mask, d_keypoints_, _descriptors, false, Stream::Null()); + convert(d_keypoints_, keypoints); + } + + void ORB_Impl::detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream) + { + CV_Assert( useProvidedKeypoints == false ); + + buildScalePyramids(_image, _mask); + computeKeyPointsPyramid(); + if (_descriptors.needed()) + { + computeDescriptors(_descriptors); + } + mergeKeyPoints(_keypoints); + } + + static float getScale(float scaleFactor, int firstLevel, int level) + { + return pow(scaleFactor, level - firstLevel); + } + + void ORB_Impl::buildScalePyramids(InputArray _image, InputArray _mask) + { + const GpuMat image = _image.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); + + CV_Assert( image.type() == CV_8UC1 ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) ); + + imagePyr_.resize(nLevels_); + maskPyr_.resize(nLevels_); + + for (int level = 0; level < nLevels_; ++level) + { + float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level); + + Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale)); + + ensureSizeIsEnough(sz, image.type(), imagePyr_[level]); + ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]); + maskPyr_[level].setTo(Scalar::all(255)); + + // Compute the resized image + if (level != firstLevel_) + { + if (level < firstLevel_) + { + cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR); + + if (!mask.empty()) + cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR); + } + else + { + cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR); + + if (!mask.empty()) + { + cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR); + cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO); + } + } + } + else + { + image.copyTo(imagePyr_[level]); + + if (!mask.empty()) + mask.copyTo(maskPyr_[level]); + } + + // Filter keypoints by image border + ensureSizeIsEnough(sz, CV_8UC1, buf_); + buf_.setTo(Scalar::all(0)); + Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_); + buf_(inner).setTo(Scalar::all(255)); + + cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]); + } + } + + // takes keypoints and culls them by the response + static void cull(GpuMat& keypoints, int& count, int n_points) { using namespace cv::cuda::device::orb; @@ -557,217 +673,196 @@ namespace count = cull_gpu(keypoints.ptr(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points); } } -} -void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() -{ - using namespace cv::cuda::device::orb; - - int half_patch_size = patchSize_ / 2; - - keyPointsPyr_.resize(nLevels_); - keyPointsCount_.resize(nLevels_); - - for (int level = 0; level < nLevels_; ++level) + void ORB_Impl::computeKeyPointsPyramid() { - fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area()); + using namespace cv::cuda::device::orb; - GpuMat fastKpRange; - fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null()); + int half_patch_size = patchSize_ / 2; - keyPointsCount_[level] = fastKpRange.cols; + keyPointsPyr_.resize(nLevels_); + keyPointsCount_.resize(nLevels_); - if (keyPointsCount_[level] == 0) - continue; + fastDetector_->setThreshold(fastThreshold_); - ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]); - fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2)); - - const int n_features = static_cast(n_features_per_level_[level]); - - if (scoreType_ == ORB::HARRIS_SCORE) + for (int level = 0; level < nLevels_; ++level) { - // Keep more points than necessary as FAST does not give amazing corners - cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features); + fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area()); - // Compute the Harris cornerness (better scoring than FAST) - HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(1), keyPointsCount_[level], 7, HARRIS_K, 0); + GpuMat fastKpRange; + fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null()); + + keyPointsCount_[level] = fastKpRange.cols; + + if (keyPointsCount_[level] == 0) + continue; + + ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]); + fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2)); + + const int n_features = static_cast(n_features_per_level_[level]); + + if (scoreType_ == ORB::HARRIS_SCORE) + { + // Keep more points than necessary as FAST does not give amazing corners + cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features); + + // Compute the Harris cornerness (better scoring than FAST) + HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(1), keyPointsCount_[level], 7, HARRIS_K, 0); + } + + //cull to the final desired level, using the new Harris scores or the original FAST scores. + cull(keyPointsPyr_[level], keyPointsCount_[level], n_features); + + // Compute orientation + IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), keyPointsCount_[level], half_patch_size, 0); + } + } + + void ORB_Impl::computeDescriptors(OutputArray _descriptors) + { + using namespace cv::cuda::device::orb; + + int nAllkeypoints = 0; + + for (int level = 0; level < nLevels_; ++level) + nAllkeypoints += keyPointsCount_[level]; + + if (nAllkeypoints == 0) + { + _descriptors.release(); + return; } - //cull to the final desired level, using the new Harris scores or the original FAST scores. - cull(keyPointsPyr_[level], keyPointsCount_[level], n_features); + ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, _descriptors); + GpuMat descriptors = _descriptors.getGpuMat(); - // Compute orientation - IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), keyPointsCount_[level], half_patch_size, 0); - } -} + int offset = 0; -void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat& descriptors) -{ - using namespace cv::cuda::device::orb; - - int nAllkeypoints = 0; - - for (int level = 0; level < nLevels_; ++level) - nAllkeypoints += keyPointsCount_[level]; - - if (nAllkeypoints == 0) - { - descriptors.release(); - return; - } - - ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors); - - int offset = 0; - - for (int level = 0; level < nLevels_; ++level) - { - if (keyPointsCount_[level] == 0) - continue; - - GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]); - - if (blurForDescriptor) + for (int level = 0; level < nLevels_; ++level) { - // preprocess the resized image - ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); - blurFilter->apply(imagePyr_[level], buf_); + if (keyPointsCount_[level] == 0) + continue; + + GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]); + + if (blurForDescriptor_) + { + // preprocess the resized image + ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); + blurFilter_->apply(imagePyr_[level], buf_); + } + + computeOrbDescriptor_gpu(blurForDescriptor_ ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), + keyPointsCount_[level], pattern_.ptr(0), pattern_.ptr(1), descRange, descriptorSize(), WTA_K_, 0); + + offset += keyPointsCount_[level]; + } + } + + void ORB_Impl::mergeKeyPoints(OutputArray _keypoints) + { + using namespace cv::cuda::device::orb; + + int nAllkeypoints = 0; + + for (int level = 0; level < nLevels_; ++level) + nAllkeypoints += keyPointsCount_[level]; + + if (nAllkeypoints == 0) + { + _keypoints.release(); + return; } - computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), - keyPointsCount_[level], pattern_.ptr(0), pattern_.ptr(1), descRange, descriptorSize(), WTA_K_, 0); + ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, _keypoints); + GpuMat& keypoints = _keypoints.getGpuMatRef(); - offset += keyPointsCount_[level]; + int offset = 0; + + for (int level = 0; level < nLevels_; ++level) + { + if (keyPointsCount_[level] == 0) + continue; + + float sf = getScale(scaleFactor_, firstLevel_, level); + + GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]); + + float locScale = level != firstLevel_ ? sf : 1.0f; + + mergeLocation_gpu(keyPointsPyr_[level].ptr(0), keyPointsRange.ptr(0), keyPointsRange.ptr(1), keyPointsCount_[level], locScale, 0); + + GpuMat range = keyPointsRange.rowRange(2, 4); + keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range); + + keyPointsRange.row(4).setTo(Scalar::all(level)); + keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf)); + + offset += keyPointsCount_[level]; + } } -} -void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat& keypoints) -{ - using namespace cv::cuda::device::orb; - - int nAllkeypoints = 0; - - for (int level = 0; level < nLevels_; ++level) - nAllkeypoints += keyPointsCount_[level]; - - if (nAllkeypoints == 0) + void ORB_Impl::convert(InputArray _gpu_keypoints, std::vector& keypoints) { - keypoints.release(); - return; - } + if (_gpu_keypoints.empty()) + { + keypoints.clear(); + return; + } - ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints); + Mat h_keypoints; + if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_keypoints.getGpuMat().download(h_keypoints); + } + else + { + h_keypoints = _gpu_keypoints.getMat(); + } - int offset = 0; + CV_Assert( h_keypoints.rows == ROWS_COUNT ); + CV_Assert( h_keypoints.type() == CV_32FC1 ); - for (int level = 0; level < nLevels_; ++level) - { - if (keyPointsCount_[level] == 0) - continue; + const int npoints = h_keypoints.cols; - float sf = getScale(scaleFactor_, firstLevel_, level); + keypoints.resize(npoints); - GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]); + const float* x_ptr = h_keypoints.ptr(X_ROW); + const float* y_ptr = h_keypoints.ptr(Y_ROW); + const float* response_ptr = h_keypoints.ptr(RESPONSE_ROW); + const float* angle_ptr = h_keypoints.ptr(ANGLE_ROW); + const float* octave_ptr = h_keypoints.ptr(OCTAVE_ROW); + const float* size_ptr = h_keypoints.ptr(SIZE_ROW); - float locScale = level != firstLevel_ ? sf : 1.0f; + for (int i = 0; i < npoints; ++i) + { + KeyPoint kp; - mergeLocation_gpu(keyPointsPyr_[level].ptr(0), keyPointsRange.ptr(0), keyPointsRange.ptr(1), keyPointsCount_[level], locScale, 0); + kp.pt.x = x_ptr[i]; + kp.pt.y = y_ptr[i]; + kp.response = response_ptr[i]; + kp.angle = angle_ptr[i]; + kp.octave = static_cast(octave_ptr[i]); + kp.size = size_ptr[i]; - GpuMat range = keyPointsRange.rowRange(2, 4); - keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range); - - keyPointsRange.row(4).setTo(Scalar::all(level)); - keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf)); - - offset += keyPointsCount_[level]; + keypoints[i] = kp; + } } } -void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat &d_keypoints, std::vector& keypoints) +Ptr cv::cuda::ORB::create(int nfeatures, + float scaleFactor, + int nlevels, + int edgeThreshold, + int firstLevel, + int WTA_K, + int scoreType, + int patchSize, + int fastThreshold, + bool blurForDescriptor) { - if (d_keypoints.empty()) - { - keypoints.clear(); - return; - } - - Mat h_keypoints(d_keypoints); - - convertKeyPoints(h_keypoints, keypoints); -} - -void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat &d_keypoints, std::vector& keypoints) -{ - if (d_keypoints.empty()) - { - keypoints.clear(); - return; - } - - CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT); - - const float* x_ptr = d_keypoints.ptr(X_ROW); - const float* y_ptr = d_keypoints.ptr(Y_ROW); - const float* response_ptr = d_keypoints.ptr(RESPONSE_ROW); - const float* angle_ptr = d_keypoints.ptr(ANGLE_ROW); - const float* octave_ptr = d_keypoints.ptr(OCTAVE_ROW); - const float* size_ptr = d_keypoints.ptr(SIZE_ROW); - - keypoints.resize(d_keypoints.cols); - - for (int i = 0; i < d_keypoints.cols; ++i) - { - KeyPoint kp; - - kp.pt.x = x_ptr[i]; - kp.pt.y = y_ptr[i]; - kp.response = response_ptr[i]; - kp.angle = angle_ptr[i]; - kp.octave = static_cast(octave_ptr[i]); - kp.size = size_ptr[i]; - - keypoints[i] = kp; - } -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints) -{ - buildScalePyramids(image, mask); - computeKeyPointsPyramid(); - mergeKeyPoints(keypoints); -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors) -{ - buildScalePyramids(image, mask); - computeKeyPointsPyramid(); - computeDescriptors(descriptors); - mergeKeyPoints(keypoints); -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints) -{ - (*this)(image, mask, d_keypoints_); - downloadKeyPoints(d_keypoints_, keypoints); -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints, GpuMat& descriptors) -{ - (*this)(image, mask, d_keypoints_, descriptors); - downloadKeyPoints(d_keypoints_, keypoints); -} - -void cv::cuda::ORB_CUDA::release() -{ - imagePyr_.clear(); - maskPyr_.clear(); - - buf_.release(); - - keyPointsPyr_.clear(); - - d_keypoints_.release(); + return makePtr(nfeatures, scaleFactor, nlevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize, fastThreshold, blurForDescriptor); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp index 9a8d76ce3..25ba48faf 100644 --- a/modules/cudafeatures2d/test/test_features2d.cpp +++ b/modules/cudafeatures2d/test/test_features2d.cpp @@ -122,7 +122,7 @@ namespace IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool) } -CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE) +CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE) PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor) { @@ -162,8 +162,9 @@ CUDA_TEST_P(ORB, Accuracy) cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1)); mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0)); - cv::cuda::ORB_CUDA orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); - orb.blurForDescriptor = blurForDescriptor; + cv::Ptr orb = + cv::cuda::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, + WTA_K, scoreType, patchSize, 20, blurForDescriptor); if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS)) { @@ -171,7 +172,7 @@ CUDA_TEST_P(ORB, Accuracy) { std::vector keypoints; cv::cuda::GpuMat descriptors; - orb(loadMat(image), loadMat(mask), keypoints, descriptors); + orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors); } catch (const cv::Exception& e) { @@ -182,7 +183,7 @@ CUDA_TEST_P(ORB, Accuracy) { std::vector keypoints; cv::cuda::GpuMat descriptors; - orb(loadMat(image), loadMat(mask), keypoints, descriptors); + orb->detectAndCompute(loadMat(image), loadMat(mask), keypoints, descriptors); cv::Ptr orb_gold = cv::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 8869a1b66..0d083e5bd 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -350,15 +350,15 @@ TEST(ORB) orb->detectAndCompute(src, Mat(), keypoints, descriptors); CPU_OFF; - cuda::ORB_CUDA d_orb; + Ptr d_orb = cuda::ORB::create(); cuda::GpuMat d_src(src); cuda::GpuMat d_keypoints; cuda::GpuMat d_descriptors; - d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); + d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); CUDA_ON; - d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); + d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); CUDA_OFF; } From 764d55b81df438ff218c861d4e47459f89a9f467 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 13 Jan 2015 17:57:09 +0300 Subject: [PATCH 6/8] add extended documentation for Features2DAsync --- .../include/opencv2/cudafeatures2d.hpp | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index 4a78d50e6..c7ab6e392 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -233,28 +233,47 @@ private: // Feature2DAsync // +/** @brief Abstract base class for CUDA asynchronous 2D image feature detectors and descriptor extractors. + */ class CV_EXPORTS Feature2DAsync { public: virtual ~Feature2DAsync(); + /** @brief Detects keypoints in an image. + + @param image Image. + @param keypoints The detected keypoints. + @param mask Mask specifying where to look for keypoints (optional). It must be a 8-bit integer + matrix with non-zero values in the region of interest. + @param stream CUDA stream. + */ virtual void detectAsync(InputArray image, OutputArray keypoints, InputArray mask = noArray(), Stream& stream = Stream::Null()); + /** @brief Computes the descriptors for a set of keypoints detected in an image. + + @param image Image. + @param keypoints Input collection of keypoints. + @param descriptors Computed descriptors. Row j is the descriptor for j-th keypoint. + @param stream CUDA stream. + */ virtual void computeAsync(InputArray image, OutputArray keypoints, OutputArray descriptors, Stream& stream = Stream::Null()); + /** Detects keypoints and computes the descriptors. */ virtual void detectAndComputeAsync(InputArray image, InputArray mask, OutputArray keypoints, OutputArray descriptors, - bool useProvidedKeypoints=false, + bool useProvidedKeypoints = false, Stream& stream = Stream::Null()); + /** Converts keypoints array from internal representation to standard vector. */ virtual void convert(InputArray gpu_keypoints, std::vector& keypoints) = 0; }; @@ -263,6 +282,8 @@ public: // FastFeatureDetector // +/** @brief Wrapping class for feature detection using the FAST method. + */ class CV_EXPORTS FastFeatureDetector : public cv::FastFeatureDetector, public Feature2DAsync { public: @@ -288,6 +309,10 @@ public: // ORB // +/** @brief Class implementing the ORB (*oriented BRIEF*) keypoint detector and descriptor extractor + * + * @sa cv::ORB + */ class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync { public: From 8a178da1a42d6ec2a26eed1c998889377d1576ae Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 13 Jan 2015 17:57:30 +0300 Subject: [PATCH 7/8] refactor CUDA BFMatcher algorithm: use new abstract interface and hidden implementation --- .../include/opencv2/cudafeatures2d.hpp | 411 ++-- .../cudafeatures2d/perf/perf_features2d.cpp | 24 +- .../src/brute_force_matcher.cpp | 1790 +++++++++-------- .../cudafeatures2d/test/test_features2d.cpp | 68 +- modules/stitching/src/matchers.cpp | 11 +- samples/gpu/performance/tests.cpp | 18 +- 6 files changed, 1269 insertions(+), 1053 deletions(-) diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index c7ab6e392..975726973 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -63,170 +63,315 @@ namespace cv { namespace cuda { //! @addtogroup cudafeatures2d //! @{ -/** @brief Brute-force descriptor matcher. +// +// DescriptorMatcher +// -For each descriptor in the first set, this matcher finds the closest descriptor in the second set -by trying each one. This descriptor matcher supports masking permissible matches between descriptor -sets. +/** @brief Abstract base class for matching keypoint descriptors. -The class BFMatcher_CUDA has an interface similar to the class DescriptorMatcher. It has two groups -of match methods: for matching descriptors of one image with another image or with an image set. -Also, all functions have an alternative to save results either to the GPU memory or to the CPU -memory. - -@sa DescriptorMatcher, BFMatcher +It has two groups of match methods: for matching descriptors of an image with another image or with +an image set. */ -class CV_EXPORTS BFMatcher_CUDA +class CV_EXPORTS DescriptorMatcher : public cv::Algorithm { public: - explicit BFMatcher_CUDA(int norm = cv::NORM_L2); + // + // Factories + // - //! Add descriptors to train descriptor collection - void add(const std::vector& descCollection); + /** @brief Brute-force descriptor matcher. - //! Get train descriptors collection - const std::vector& getTrainDescriptors() const; + For each descriptor in the first set, this matcher finds the closest descriptor in the second set + by trying each one. This descriptor matcher supports masking permissible matches of descriptor + sets. - //! Clear train descriptors collection - void clear(); + @param normType One of NORM_L1, NORM_L2, NORM_HAMMING. L1 and L2 norms are + preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and + BRIEF). + */ + static Ptr createBFMatcher(int norm = cv::NORM_L2); - //! Return true if there are not train descriptors in collection - bool empty() const; + // + // Utility + // - //! Return true if the matcher supports mask in match methods - bool isMaskSupported() const; + /** @brief Returns true if the descriptor matcher supports masking permissible matches. + */ + virtual bool isMaskSupported() const = 0; - //! Find one best match for each query descriptor - void matchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, - const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); + // + // Descriptor collection + // - //! Download trainIdx and distance and convert it to CPU vector with DMatch - static void matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector& matches); - //! Convert trainIdx and distance to vector with DMatch - static void matchConvert(const Mat& trainIdx, const Mat& distance, std::vector& matches); + /** @brief Adds descriptors to train a descriptor collection. - //! Find one best match for each query descriptor - void match(const GpuMat& query, const GpuMat& train, std::vector& matches, const GpuMat& mask = GpuMat()); + If the collection is not empty, the new descriptors are added to existing train descriptors. - //! Make gpu collection of trains and masks in suitable format for matchCollection function - void makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, const std::vector& masks = std::vector()); + @param descriptors Descriptors to add. Each descriptors[i] is a set of descriptors from the same + train image. + */ + virtual void add(const std::vector& descriptors) = 0; - //! Find one best match from train collection for each query descriptor - void matchCollection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& masks = GpuMat(), Stream& stream = Stream::Null()); + /** @brief Returns a constant link to the train descriptor collection. + */ + virtual const std::vector& getTrainDescriptors() const = 0; - //! Download trainIdx, imgIdx and distance and convert it to vector with DMatch - static void matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector& matches); - //! Convert trainIdx, imgIdx and distance to vector with DMatch - static void matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector& matches); + /** @brief Clears the train descriptor collection. + */ + virtual void clear() = 0; - //! Find one best match from train collection for each query descriptor. - void match(const GpuMat& query, std::vector& matches, const std::vector& masks = std::vector()); + /** @brief Returns true if there are no train descriptors in the collection. + */ + virtual bool empty() const = 0; - //! Find k best matches for each query descriptor (in increasing order of distances) - void knnMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, - const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); + /** @brief Trains a descriptor matcher. - //! Download trainIdx and distance and convert it to vector with DMatch - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - static void knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx and distance to vector with DMatch - static void knnMatchConvert(const Mat& trainIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult = false); + Trains a descriptor matcher (for example, the flann index). In all methods to match, the method + train() is run every time before matching. + */ + virtual void train() = 0; - //! Find k best matches for each query descriptor (in increasing order of distances). - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, int k, const GpuMat& mask = GpuMat(), - bool compactResult = false); + // + // 1 to 1 match + // - //! Find k best matches from train collection for each query descriptor (in increasing order of distances) - void knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& maskCollection = GpuMat(), Stream& stream = Stream::Null()); + /** @brief Finds the best match for each descriptor from a query set (blocking version). - //! Download trainIdx and distance and convert it to vector with DMatch - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - //! @see BFMatcher_CUDA::knnMatchDownload - static void knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx and distance to vector with DMatch - //! @see BFMatcher_CUDA::knnMatchConvert - static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult = false); + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. If a query descriptor is masked out in mask , no match is added for this + descriptor. So, matches size may be smaller than the query descriptors count. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. - //! Find k best matches for each query descriptor (in increasing order of distances). - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - void knnMatch(const GpuMat& query, std::vector< std::vector >& matches, int k, - const std::vector& masks = std::vector(), bool compactResult = false); + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. + */ + virtual void match(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector& matches, + InputArray mask = noArray()) = 0; - //! Find best matches for each query descriptor which have distance less than maxDistance. - //! nMatches.at(0, queryIdx) will contain matches count for queryIdx. - //! carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, - //! because it didn't have enough memory. - //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), - //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - //! Matches doesn't sorted. - void radiusMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, - const GpuMat& mask = GpuMat(), Stream& stream = Stream::Null()); + /** @overload + */ + virtual void match(InputArray queryDescriptors, + std::vector& matches, + const std::vector& masks = std::vector()) = 0; - //! Download trainIdx, nMatches and distance and convert it to vector with DMatch. - //! matches will be sorted in increasing order of distances. - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); + /** @brief Finds the best match for each descriptor from a query set (asynchronous version). - //! Find best matches for each query descriptor which have distance less than maxDistance - //! in increasing order of distances). - void radiusMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, float maxDistance, - const GpuMat& mask = GpuMat(), bool compactResult = false); + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches array stored in GPU memory. Internal representation is not defined. + Use DescriptorMatcher::matchConvert method to retrieve results in standard representation. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param stream CUDA stream. - //! Find best matches for each query descriptor which have distance less than maxDistance. - //! If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), - //! otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches - //! Matches doesn't sorted. - void radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, - const std::vector& masks = std::vector(), Stream& stream = Stream::Null()); + In the first variant of this method, the train descriptors are passed as an input argument. In the + second variant of the method, train descriptors collection that was set by DescriptorMatcher::add is + used. Optional mask (or masks) can be passed to specify which query and training descriptors can be + matched. Namely, queryDescriptors[i] can be matched with trainDescriptors[j] only if + mask.at\(i,j) is non-zero. + */ + virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + InputArray mask = noArray(), + Stream& stream = Stream::Null()) = 0; - //! Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. - //! matches will be sorted in increasing order of distances. - //! compactResult is used when mask is not empty. If compactResult is false matches - //! vector will have the same size as queryDescriptors rows. If compactResult is true - //! matches vector will not contain matches for fully masked out query descriptors. - static void radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); - //! Convert trainIdx, nMatches and distance to vector with DMatch. - static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult = false); + /** @overload + */ + virtual void matchAsync(InputArray queryDescriptors, + OutputArray matches, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()) = 0; - //! Find best matches from train collection for each query descriptor which have distance less than - //! maxDistance (in increasing order of distances). - void radiusMatch(const GpuMat& query, std::vector< std::vector >& matches, float maxDistance, - const std::vector& masks = std::vector(), bool compactResult = false); + /** @brief Converts matches array from internal representation to standard matches vector. - int norm; + The method is supposed to be used with DescriptorMatcher::matchAsync to get final result. + Call this method only after DescriptorMatcher::matchAsync is completed (ie. after synchronization). -private: - std::vector trainDescCollection; + @param gpu_matches Matches, returned from DescriptorMatcher::matchAsync. + @param matches Vector of DMatch objects. + */ + virtual void matchConvert(InputArray gpu_matches, + std::vector& matches) = 0; + + // + // knn match + // + + /** @brief Finds the k best matches for each descriptor from a query set (blocking version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches. Each matches[i] is k or less matches for the same query descriptor. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + These extended variants of DescriptorMatcher::match methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::match + for the details about query and train descriptors. + */ + virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + int k, + InputArray mask = noArray(), + bool compactResult = false) = 0; + + /** @overload + */ + virtual void knnMatch(InputArray queryDescriptors, + std::vector >& matches, + int k, + const std::vector& masks = std::vector(), + bool compactResult = false) = 0; + + /** @brief Finds the k best matches for each descriptor from a query set (asynchronous version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches array stored in GPU memory. Internal representation is not defined. + Use DescriptorMatcher::knnMatchConvert method to retrieve results in standard representation. + @param k Count of best matches found per each query descriptor or less if a query descriptor has + less than k possible matches in total. + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param stream CUDA stream. + + These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query + descriptor. The matches are returned in the distance increasing order. See DescriptorMatcher::matchAsync + for the details about query and train descriptors. + */ + virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + int k, + InputArray mask = noArray(), + Stream& stream = Stream::Null()) = 0; + + /** @overload + */ + virtual void knnMatchAsync(InputArray queryDescriptors, + OutputArray matches, + int k, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()) = 0; + + /** @brief Converts matches array from internal representation to standard matches vector. + + The method is supposed to be used with DescriptorMatcher::knnMatchAsync to get final result. + Call this method only after DescriptorMatcher::knnMatchAsync is completed (ie. after synchronization). + + @param gpu_matches Matches, returned from DescriptorMatcher::knnMatchAsync. + @param matches Vector of DMatch objects. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + virtual void knnMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false) = 0; + + // + // radius match + // + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (blocking version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Found matches. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ + virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + float maxDistance, + InputArray mask = noArray(), + bool compactResult = false) = 0; + + /** @overload + */ + virtual void radiusMatch(InputArray queryDescriptors, + std::vector >& matches, + float maxDistance, + const std::vector& masks = std::vector(), + bool compactResult = false) = 0; + + /** @brief For each query descriptor, finds the training descriptors not farther than the specified distance (asynchronous version). + + @param queryDescriptors Query set of descriptors. + @param trainDescriptors Train set of descriptors. This set is not added to the train descriptors + collection stored in the class object. + @param matches Matches array stored in GPU memory. Internal representation is not defined. + Use DescriptorMatcher::radiusMatchConvert method to retrieve results in standard representation. + @param maxDistance Threshold for the distance between matched descriptors. Distance means here + metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured + in Pixels)! + @param mask Mask specifying permissible matches between an input query and train matrices of + descriptors. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + @param stream CUDA stream. + + For each query descriptor, the methods find such training descriptors that the distance between the + query descriptor and the training descriptor is equal or smaller than maxDistance. Found matches are + returned in the distance increasing order. + */ + virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + float maxDistance, + InputArray mask = noArray(), + Stream& stream = Stream::Null()) = 0; + + /** @overload + */ + virtual void radiusMatchAsync(InputArray queryDescriptors, + OutputArray matches, + float maxDistance, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()) = 0; + + /** @brief Converts matches array from internal representation to standard matches vector. + + The method is supposed to be used with DescriptorMatcher::radiusMatchAsync to get final result. + Call this method only after DescriptorMatcher::radiusMatchAsync is completed (ie. after synchronization). + + @param gpu_matches Matches, returned from DescriptorMatcher::radiusMatchAsync. + @param matches Vector of DMatch objects. + @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is + false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, + the matches vector does not contain matches for fully masked-out query descriptors. + */ + virtual void radiusMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false) = 0; }; // diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp index 0dcb0434f..9d8134816 100644 --- a/modules/cudafeatures2d/perf/perf_features2d.cpp +++ b/modules/cudafeatures2d/perf/perf_features2d.cpp @@ -167,16 +167,16 @@ PERF_TEST_P(DescSize_Norm, BFMatch, if (PERF_RUN_CUDA()) { - cv::cuda::BFMatcher_CUDA d_matcher(normType); + cv::Ptr d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType); const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_train(train); - cv::cuda::GpuMat d_trainIdx, d_distance; + cv::cuda::GpuMat d_matches; - TEST_CYCLE() d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + TEST_CYCLE() d_matcher->matchAsync(d_query, d_train, d_matches); std::vector gpu_matches; - d_matcher.matchDownload(d_trainIdx, d_distance, gpu_matches); + d_matcher->matchConvert(d_matches, gpu_matches); SANITY_CHECK_MATCHES(gpu_matches); } @@ -226,16 +226,16 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch, if (PERF_RUN_CUDA()) { - cv::cuda::BFMatcher_CUDA d_matcher(normType); + cv::Ptr d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType); const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_train(train); - cv::cuda::GpuMat d_trainIdx, d_distance, d_allDist; + cv::cuda::GpuMat d_matches; - TEST_CYCLE() d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, k); + TEST_CYCLE() d_matcher->knnMatchAsync(d_query, d_train, d_matches, k); std::vector< std::vector > matchesTbl; - d_matcher.knnMatchDownload(d_trainIdx, d_distance, matchesTbl); + d_matcher->knnMatchConvert(d_matches, matchesTbl); std::vector gpu_matches; toOneRowMatches(matchesTbl, gpu_matches); @@ -280,16 +280,16 @@ PERF_TEST_P(DescSize_Norm, BFRadiusMatch, if (PERF_RUN_CUDA()) { - cv::cuda::BFMatcher_CUDA d_matcher(normType); + cv::Ptr d_matcher = cv::cuda::DescriptorMatcher::createBFMatcher(normType); const cv::cuda::GpuMat d_query(query); const cv::cuda::GpuMat d_train(train); - cv::cuda::GpuMat d_trainIdx, d_nMatches, d_distance; + cv::cuda::GpuMat d_matches; - TEST_CYCLE() d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, maxDistance); + TEST_CYCLE() d_matcher->radiusMatchAsync(d_query, d_train, d_matches, maxDistance); std::vector< std::vector > matchesTbl; - d_matcher.radiusMatchDownload(d_trainIdx, d_distance, d_nMatches, matchesTbl); + d_matcher->radiusMatchConvert(d_matches, matchesTbl); std::vector gpu_matches; toOneRowMatches(matchesTbl, gpu_matches); diff --git a/modules/cudafeatures2d/src/brute_force_matcher.cpp b/modules/cudafeatures2d/src/brute_force_matcher.cpp index 5de0b06e3..a00537c8e 100644 --- a/modules/cudafeatures2d/src/brute_force_matcher.cpp +++ b/modules/cudafeatures2d/src/brute_force_matcher.cpp @@ -47,37 +47,7 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::add(const std::vector&) { throw_no_cuda(); } -const std::vector& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const { throw_no_cuda(); return trainDescCollection; } -void cv::cuda::BFMatcher_CUDA::clear() { throw_no_cuda(); } -bool cv::cuda::BFMatcher_CUDA::empty() const { throw_no_cuda(); return true; } -bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const { throw_no_cuda(); return true; } -void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, const GpuMat&, std::vector&, const GpuMat&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat&, GpuMat&, const std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat&, const Mat&, const Mat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::match(const GpuMat&, std::vector&, const std::vector&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, int, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, const GpuMat&, std::vector< std::vector >&, int, const GpuMat&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat&, const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat&, std::vector< std::vector >&, int, const std::vector&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, const GpuMat&, std::vector< std::vector >&, float, const GpuMat&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat&, GpuMat&, GpuMat&, GpuMat&, GpuMat&, float, const std::vector&, Stream&) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat&, const GpuMat&, const GpuMat&, const GpuMat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat&, const Mat&, const Mat&, const Mat&, std::vector< std::vector >&, bool) { throw_no_cuda(); } -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat&, std::vector< std::vector >&, float, const std::vector&, bool) { throw_no_cuda(); } +Ptr cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -155,857 +125,953 @@ namespace cv { namespace cuda { namespace device } }}} -//////////////////////////////////////////////////////////////////// -// Train collection - -cv::cuda::BFMatcher_CUDA::BFMatcher_CUDA(int norm_) : norm(norm_) -{ -} - -void cv::cuda::BFMatcher_CUDA::add(const std::vector& descCollection) -{ - trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end()); -} - -const std::vector& cv::cuda::BFMatcher_CUDA::getTrainDescriptors() const -{ - return trainDescCollection; -} - -void cv::cuda::BFMatcher_CUDA::clear() -{ - trainDescCollection.clear(); -} - -bool cv::cuda::BFMatcher_CUDA::empty() const -{ - return trainDescCollection.empty(); -} - -bool cv::cuda::BFMatcher_CUDA::isMaskSupported() const -{ - return true; -} - -//////////////////////////////////////////////////////////////////// -// Match - -void cv::cuda::BFMatcher_CUDA::matchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, - const GpuMat& mask, Stream& stream) -{ - if (query.empty() || train.empty()) - return; - - using namespace cv::cuda::device::bf_match; - - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, - const PtrStepSzi& trainIdx, const PtrStepSzf& distance, - cudaStream_t stream); - - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(train.cols == query.cols && train.type() == query.type()); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); - - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - const int nQuery = query.rows; - - ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32F, distance); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); - - func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); -} - -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& distance, std::vector& matches) -{ - if (trainIdx.empty() || distance.empty()) - return; - - Mat trainIdxCPU(trainIdx); - Mat distanceCPU(distance); - - matchConvert(trainIdxCPU, distanceCPU, matches); -} - -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector& matches) -{ - if (trainIdx.empty() || distance.empty()) - return; - - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols); - - const int nQuery = trainIdx.cols; - - matches.clear(); - matches.reserve(nQuery); - - const int* trainIdx_ptr = trainIdx.ptr(); - const float* distance_ptr = distance.ptr(); - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++distance_ptr) - { - int train_idx = *trainIdx_ptr; - - if (train_idx == -1) - continue; - - float distance_local = *distance_ptr; - - DMatch m(queryIdx, train_idx, 0, distance_local); - - matches.push_back(m); - } -} - -void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, const GpuMat& train, - std::vector& matches, const GpuMat& mask) -{ - GpuMat trainIdx, distance; - matchSingle(query, train, trainIdx, distance, mask); - matchDownload(trainIdx, distance, matches); -} - -void cv::cuda::BFMatcher_CUDA::makeGpuCollection(GpuMat& trainCollection, GpuMat& maskCollection, - const std::vector& masks) -{ - if (empty()) - return; - - if (masks.empty()) - { - Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); - - PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); - - for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) - *trainCollectionCPU_ptr = trainDescCollection[i]; - - trainCollection.upload(trainCollectionCPU); - maskCollection.release(); - } - else - { - CV_Assert(masks.size() == trainDescCollection.size()); - - Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); - Mat maskCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb))); - - PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); - PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr(); - - for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) - { - const GpuMat& train = trainDescCollection[i]; - const GpuMat& mask = masks[i]; - - CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows)); - - *trainCollectionCPU_ptr = train; - *maskCollectionCPU_ptr = mask; - } - - trainCollection.upload(trainCollectionCPU); - maskCollection.upload(maskCollectionCPU); - } -} - -void cv::cuda::BFMatcher_CUDA::matchCollection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& masks, Stream& stream) -{ - if (query.empty() || trainCollection.empty()) - return; - - using namespace cv::cuda::device::bf_match; - - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, - const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, - cudaStream_t stream); - - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); - - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - const int nQuery = query.rows; - - ensureSizeIsEnough(1, nQuery, CV_32S, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32S, imgIdx); - ensureSizeIsEnough(1, nQuery, CV_32F, distance); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); - - func(query, trainCollection, masks, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); -} - -void cv::cuda::BFMatcher_CUDA::matchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, std::vector& matches) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; - - Mat trainIdxCPU(trainIdx); - Mat imgIdxCPU(imgIdx); - Mat distanceCPU(distance); - - matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches); -} - -void cv::cuda::BFMatcher_CUDA::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector& matches) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; - - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.cols == trainIdx.cols); - CV_Assert(distance.type() == CV_32FC1 && distance.cols == trainIdx.cols); - - const int nQuery = trainIdx.cols; - - matches.clear(); - matches.reserve(nQuery); - - const int* trainIdx_ptr = trainIdx.ptr(); - const int* imgIdx_ptr = imgIdx.ptr(); - const float* distance_ptr = distance.ptr(); - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) - { - int _trainIdx = *trainIdx_ptr; - - if (_trainIdx == -1) - continue; - - int _imgIdx = *imgIdx_ptr; - - float _distance = *distance_ptr; - - DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); - - matches.push_back(m); - } -} - -void cv::cuda::BFMatcher_CUDA::match(const GpuMat& query, std::vector& matches, const std::vector& masks) -{ - GpuMat trainCollection; - GpuMat maskCollection; - - makeGpuCollection(trainCollection, maskCollection, masks); - - GpuMat trainIdx, imgIdx, distance; - - matchCollection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection); - matchDownload(trainIdx, imgIdx, distance, matches); -} - -//////////////////////////////////////////////////////////////////// -// KnnMatch - -void cv::cuda::BFMatcher_CUDA::knnMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& allDist, int k, - const GpuMat& mask, Stream& stream) -{ - if (query.empty() || train.empty()) - return; - - using namespace cv::cuda::device::bf_knnmatch; - - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, - const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, - cudaStream_t stream); - - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(train.type() == query.type() && train.cols == query.cols); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); - - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - const int nQuery = query.rows; - const int nTrain = train.rows; - - if (k == 2) - { - ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); - } - else - { - ensureSizeIsEnough(nQuery, k, CV_32S, trainIdx); - ensureSizeIsEnough(nQuery, k, CV_32F, distance); - ensureSizeIsEnough(nQuery, nTrain, CV_32FC1, allDist); - } - - trainIdx.setTo(Scalar::all(-1), stream); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); - - func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream)); -} - -void cv::cuda::BFMatcher_CUDA::knnMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty()) - return; - - Mat trainIdxCPU(trainIdx); - Mat distanceCPU(distance); - - knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult); -} - -void cv::cuda::BFMatcher_CUDA::knnMatchConvert(const Mat& trainIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty()) - return; - - CV_Assert(trainIdx.type() == CV_32SC2 || trainIdx.type() == CV_32SC1); - CV_Assert(distance.type() == CV_32FC2 || distance.type() == CV_32FC1); - CV_Assert(distance.size() == trainIdx.size()); - CV_Assert(trainIdx.isContinuous() && distance.isContinuous()); - - const int nQuery = trainIdx.type() == CV_32SC2 ? trainIdx.cols : trainIdx.rows; - const int k = trainIdx.type() == CV_32SC2 ? 2 :trainIdx.cols; - - matches.clear(); - matches.reserve(nQuery); - - const int* trainIdx_ptr = trainIdx.ptr(); - const float* distance_ptr = distance.ptr(); - - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) - { - matches.push_back(std::vector()); - std::vector& curMatches = matches.back(); - curMatches.reserve(k); - - for (int i = 0; i < k; ++i, ++trainIdx_ptr, ++distance_ptr) - { - int _trainIdx = *trainIdx_ptr; - - if (_trainIdx != -1) - { - float _distance = *distance_ptr; - - DMatch m(queryIdx, _trainIdx, 0, _distance); - - curMatches.push_back(m); - } - } - - if (compactResult && curMatches.empty()) - matches.pop_back(); - } -} - -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, int k, const GpuMat& mask, bool compactResult) -{ - GpuMat trainIdx, distance, allDist; - knnMatchSingle(query, train, trainIdx, distance, allDist, k, mask); - knnMatchDownload(trainIdx, distance, matches, compactResult); -} - -void cv::cuda::BFMatcher_CUDA::knnMatch2Collection(const GpuMat& query, const GpuMat& trainCollection, - GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, - const GpuMat& maskCollection, Stream& stream) -{ - if (query.empty() || trainCollection.empty()) - return; - - using namespace cv::cuda::device::bf_knnmatch; - - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, - const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, - cudaStream_t stream); - - static const caller_t callersL1[] = - { - match2L1_gpu, 0/*match2L1_gpu*/, - match2L1_gpu, match2L1_gpu, - match2L1_gpu, match2L1_gpu - }; - static const caller_t callersL2[] = - { - 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, - 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, - 0/*match2L2_gpu*/, match2L2_gpu - }; - static const caller_t callersHamming[] = - { - match2Hamming_gpu, 0/*match2Hamming_gpu*/, - match2Hamming_gpu, 0/*match2Hamming_gpu*/, - match2Hamming_gpu, 0/*match2Hamming_gpu*/ - }; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); - - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - const int nQuery = query.rows; - - ensureSizeIsEnough(1, nQuery, CV_32SC2, trainIdx); - ensureSizeIsEnough(1, nQuery, CV_32SC2, imgIdx); - ensureSizeIsEnough(1, nQuery, CV_32FC2, distance); - - trainIdx.setTo(Scalar::all(-1), stream); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); - - func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); -} - -void cv::cuda::BFMatcher_CUDA::knnMatch2Download(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; - - Mat trainIdxCPU(trainIdx); - Mat imgIdxCPU(imgIdx); - Mat distanceCPU(distance); - - knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult); -} - -void cv::cuda::BFMatcher_CUDA::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty()) - return; - - CV_Assert(trainIdx.type() == CV_32SC2); - CV_Assert(imgIdx.type() == CV_32SC2 && imgIdx.cols == trainIdx.cols); - CV_Assert(distance.type() == CV_32FC2 && distance.cols == trainIdx.cols); - - const int nQuery = trainIdx.cols; - - matches.clear(); - matches.reserve(nQuery); - - const int* trainIdx_ptr = trainIdx.ptr(); - const int* imgIdx_ptr = imgIdx.ptr(); - const float* distance_ptr = distance.ptr(); - - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) - { - matches.push_back(std::vector()); - std::vector& curMatches = matches.back(); - curMatches.reserve(2); - - for (int i = 0; i < 2; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) - { - int _trainIdx = *trainIdx_ptr; - - if (_trainIdx != -1) - { - int _imgIdx = *imgIdx_ptr; - - float _distance = *distance_ptr; - - DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); - - curMatches.push_back(m); - } - } - - if (compactResult && curMatches.empty()) - matches.pop_back(); - } -} - namespace { - struct ImgIdxSetter + static void makeGpuCollection(const std::vector& trainDescCollection, + const std::vector& masks, + GpuMat& trainCollection, + GpuMat& maskCollection) { - explicit inline ImgIdxSetter(int imgIdx_) : imgIdx(imgIdx_) {} - inline void operator()(DMatch& m) const {m.imgIdx = imgIdx;} - int imgIdx; - }; -} - -void cv::cuda::BFMatcher_CUDA::knnMatch(const GpuMat& query, std::vector< std::vector >& matches, int k, - const std::vector& masks, bool compactResult) -{ - if (k == 2) - { - GpuMat trainCollection; - GpuMat maskCollection; - - makeGpuCollection(trainCollection, maskCollection, masks); - - GpuMat trainIdx, imgIdx, distance; - - knnMatch2Collection(query, trainCollection, trainIdx, imgIdx, distance, maskCollection); - knnMatch2Download(trainIdx, imgIdx, distance, matches); - } - else - { - if (query.empty() || empty()) + if (trainDescCollection.empty()) return; - std::vector< std::vector > curMatches; - std::vector temp; - temp.reserve(2 * k); - - matches.resize(query.rows); - for_each(matches.begin(), matches.end(), bind2nd(mem_fun_ref(&std::vector::reserve), k)); - - for (size_t imgIdx = 0, size = trainDescCollection.size(); imgIdx < size; ++imgIdx) + if (masks.empty()) { - knnMatch(query, trainDescCollection[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]); + Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); - for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) + PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + + for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr) + *trainCollectionCPU_ptr = trainDescCollection[i]; + + trainCollection.upload(trainCollectionCPU); + maskCollection.release(); + } + else + { + CV_Assert( masks.size() == trainDescCollection.size() ); + + Mat trainCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb))); + Mat maskCollectionCPU(1, static_cast(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb))); + + PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr(); + PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr(); + + for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr) { - std::vector& localMatch = curMatches[queryIdx]; - std::vector& globalMatch = matches[queryIdx]; + const GpuMat& train = trainDescCollection[i]; + const GpuMat& mask = masks[i]; - for_each(localMatch.begin(), localMatch.end(), ImgIdxSetter(static_cast(imgIdx))); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) ); - temp.clear(); - merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), back_inserter(temp)); + *trainCollectionCPU_ptr = train; + *maskCollectionCPU_ptr = mask; + } - globalMatch.clear(); - const size_t count = std::min((size_t)k, temp.size()); - copy(temp.begin(), temp.begin() + count, back_inserter(globalMatch)); + trainCollection.upload(trainCollectionCPU); + maskCollection.upload(maskCollectionCPU); + } + } + + class BFMatcher_Impl : public cv::cuda::DescriptorMatcher + { + public: + explicit BFMatcher_Impl(int norm) : norm_(norm) + { + CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING ); + } + + virtual bool isMaskSupported() const { return true; } + + virtual void add(const std::vector& descriptors) + { + trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end()); + } + + virtual const std::vector& getTrainDescriptors() const + { + return trainDescCollection_; + } + + virtual void clear() + { + trainDescCollection_.clear(); + } + + virtual bool empty() const + { + return trainDescCollection_.empty(); + } + + virtual void train() + { + } + + virtual void match(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector& matches, + InputArray mask = noArray()); + + virtual void match(InputArray queryDescriptors, + std::vector& matches, + const std::vector& masks = std::vector()); + + virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + InputArray mask = noArray(), + Stream& stream = Stream::Null()); + + virtual void matchAsync(InputArray queryDescriptors, + OutputArray matches, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()); + + virtual void matchConvert(InputArray gpu_matches, + std::vector& matches); + + virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + int k, + InputArray mask = noArray(), + bool compactResult = false); + + virtual void knnMatch(InputArray queryDescriptors, + std::vector >& matches, + int k, + const std::vector& masks = std::vector(), + bool compactResult = false); + + virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + int k, + InputArray mask = noArray(), + Stream& stream = Stream::Null()); + + virtual void knnMatchAsync(InputArray queryDescriptors, + OutputArray matches, + int k, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()); + + virtual void knnMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false); + + virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors, + std::vector >& matches, + float maxDistance, + InputArray mask = noArray(), + bool compactResult = false); + + virtual void radiusMatch(InputArray queryDescriptors, + std::vector >& matches, + float maxDistance, + const std::vector& masks = std::vector(), + bool compactResult = false); + + virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors, + OutputArray matches, + float maxDistance, + InputArray mask = noArray(), + Stream& stream = Stream::Null()); + + virtual void radiusMatchAsync(InputArray queryDescriptors, + OutputArray matches, + float maxDistance, + const std::vector& masks = std::vector(), + Stream& stream = Stream::Null()); + + virtual void radiusMatchConvert(InputArray gpu_matches, + std::vector< std::vector >& matches, + bool compactResult = false); + + private: + int norm_; + std::vector trainDescCollection_; + }; + + // + // 1 to 1 match + // + + void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors, + std::vector& matches, + InputArray _mask) + { + GpuMat d_matches; + matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask); + matchConvert(d_matches, matches); + } + + void BFMatcher_Impl::match(InputArray _queryDescriptors, + std::vector& matches, + const std::vector& masks) + { + GpuMat d_matches; + matchAsync(_queryDescriptors, d_matches, masks); + matchConvert(d_matches, matches); + } + + void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, + OutputArray _matches, + InputArray _mask, + Stream& stream) + { + using namespace cv::cuda::device::bf_match; + + const GpuMat query = _queryDescriptors.getGpuMat(); + const GpuMat train = _trainDescriptors.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); + + if (query.empty() || train.empty()) + { + _matches.release(); + return; + } + + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + CV_Assert( train.cols == query.cols && train.type() == query.type() ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); + + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, + cudaStream_t stream); + + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; + + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; + + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } + + const int nQuery = query.rows; + + _matches.create(2, nQuery, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); + + GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0)); + GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1)); + + func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream)); + } + + void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, + OutputArray _matches, + const std::vector& masks, + Stream& stream) + { + using namespace cv::cuda::device::bf_match; + + const GpuMat query = _queryDescriptors.getGpuMat(); + + if (query.empty() || trainDescCollection_.empty()) + { + _matches.release(); + return; + } + + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + + GpuMat trainCollection, maskCollection; + makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); + + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, + cudaStream_t stream); + + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; + + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; + + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } + + const int nQuery = query.rows; + + _matches.create(3, nQuery, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); + + GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0)); + GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1)); + GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2)); + + func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); + } + + void BFMatcher_Impl::matchConvert(InputArray _gpu_matches, + std::vector& matches) + { + Mat gpu_matches; + if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_matches.getGpuMat().download(gpu_matches); + } + else + { + gpu_matches = _gpu_matches.getMat(); + } + + if (gpu_matches.empty()) + { + matches.clear(); + return; + } + + CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) ); + + const int nQuery = gpu_matches.cols; + + matches.clear(); + matches.reserve(nQuery); + + const int* trainIdxPtr = NULL; + const int* imgIdxPtr = NULL; + const float* distancePtr = NULL; + + if (gpu_matches.rows == 2) + { + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(1); + } + else + { + trainIdxPtr = gpu_matches.ptr(0); + imgIdxPtr = gpu_matches.ptr(1); + distancePtr = gpu_matches.ptr(2); + } + + for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) + { + const int trainIdx = trainIdxPtr[queryIdx]; + if (trainIdx == -1) + continue; + + const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0; + const float distance = distancePtr[queryIdx]; + + DMatch m(queryIdx, trainIdx, imgIdx, distance); + + matches.push_back(m); + } + } + + // + // knn match + // + + void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors, + std::vector >& matches, + int k, + InputArray _mask, + bool compactResult) + { + GpuMat d_matches; + knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask); + knnMatchConvert(d_matches, matches, compactResult); + } + + void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, + std::vector >& matches, + int k, + const std::vector& masks, + bool compactResult) + { + if (k == 2) + { + GpuMat d_matches; + knnMatchAsync(_queryDescriptors, d_matches, k, masks); + knnMatchConvert(d_matches, matches, compactResult); + } + else + { + const GpuMat query = _queryDescriptors.getGpuMat(); + + if (query.empty() || trainDescCollection_.empty()) + { + matches.clear(); + return; + } + + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + + std::vector< std::vector > curMatches; + std::vector temp; + temp.reserve(2 * k); + + matches.resize(query.rows); + for (size_t i = 0; i < matches.size(); ++i) + matches[i].reserve(k); + + for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx) + { + knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]); + + for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx) + { + std::vector& localMatch = curMatches[queryIdx]; + std::vector& globalMatch = matches[queryIdx]; + + for (size_t i = 0; i < localMatch.size(); ++i) + localMatch[i].imgIdx = imgIdx; + + temp.clear(); + std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp)); + + globalMatch.clear(); + const size_t count = std::min(static_cast(k), temp.size()); + std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch)); + } + } + + if (compactResult) + { + std::vector< std::vector >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector::empty)); + matches.erase(new_end, matches.end()); } } + } - if (compactResult) + void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, + OutputArray _matches, + int k, + InputArray _mask, + Stream& stream) + { + using namespace cv::cuda::device::bf_knnmatch; + + const GpuMat query = _queryDescriptors.getGpuMat(); + const GpuMat train = _trainDescriptors.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); + + if (query.empty() || train.empty()) { - std::vector< std::vector >::iterator new_end = remove_if(matches.begin(), matches.end(), mem_fun_ref(&std::vector::empty)); - matches.erase(new_end, matches.end()); + _matches.release(); + return; + } + + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + CV_Assert( train.cols == query.cols && train.type() == query.type() ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); + + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask, + const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist, + cudaStream_t stream); + + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; + + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; + + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } + + const int nQuery = query.rows; + const int nTrain = train.rows; + + GpuMat trainIdx, distance, allDist; + if (k == 2) + { + _matches.create(2, nQuery, CV_32SC2); + GpuMat matches = _matches.getGpuMat(); + + trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0)); + distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1)); + } + else + { + _matches.create(2 * nQuery, k, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); + + trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step); + distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step); + + BufferPool pool(stream); + allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1); + } + + trainIdx.setTo(Scalar::all(-1), stream); + + func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream)); + } + + void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, + OutputArray _matches, + int k, + const std::vector& masks, + Stream& stream) + { + using namespace cv::cuda::device::bf_knnmatch; + + if (k != 2) + { + CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now"); + } + + const GpuMat query = _queryDescriptors.getGpuMat(); + + if (query.empty() || trainDescCollection_.empty()) + { + _matches.release(); + return; + } + + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + + GpuMat trainCollection, maskCollection; + makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); + + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz& masks, + const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance, + cudaStream_t stream); + + static const caller_t callersL1[] = + { + match2L1_gpu, 0/*match2L1_gpu*/, + match2L1_gpu, match2L1_gpu, + match2L1_gpu, match2L1_gpu + }; + static const caller_t callersL2[] = + { + 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, + 0/*match2L2_gpu*/, 0/*match2L2_gpu*/, + 0/*match2L2_gpu*/, match2L2_gpu + }; + static const caller_t callersHamming[] = + { + match2Hamming_gpu, 0/*match2Hamming_gpu*/, + match2Hamming_gpu, 0/*match2Hamming_gpu*/, + match2Hamming_gpu, 0/*match2Hamming_gpu*/ + }; + + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; + + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } + + const int nQuery = query.rows; + + _matches.create(3, nQuery, CV_32SC2); + GpuMat matches = _matches.getGpuMat(); + + GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0)); + GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1)); + GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2)); + + trainIdx.setTo(Scalar::all(-1), stream); + + func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream)); + } + + void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches, + std::vector< std::vector >& matches, + bool compactResult) + { + Mat gpu_matches; + if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_matches.getGpuMat().download(gpu_matches); + } + else + { + gpu_matches = _gpu_matches.getMat(); + } + + if (gpu_matches.empty()) + { + matches.clear(); + return; + } + + CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) || + (gpu_matches.type() == CV_32SC1) ); + + int nQuery = -1, k = -1; + + const int* trainIdxPtr = NULL; + const int* imgIdxPtr = NULL; + const float* distancePtr = NULL; + + if (gpu_matches.type() == CV_32SC2) + { + nQuery = gpu_matches.cols; + k = 2; + + if (gpu_matches.rows == 2) + { + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(1); + } + else + { + trainIdxPtr = gpu_matches.ptr(0); + imgIdxPtr = gpu_matches.ptr(1); + distancePtr = gpu_matches.ptr(2); + } + } + else + { + nQuery = gpu_matches.rows / 2; + k = gpu_matches.cols; + + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(nQuery); + } + + matches.clear(); + matches.reserve(nQuery); + + for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) + { + matches.push_back(std::vector()); + std::vector& curMatches = matches.back(); + curMatches.reserve(k); + + for (int i = 0; i < k; ++i) + { + const int trainIdx = *trainIdxPtr; + if (trainIdx == -1) + continue; + + const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0; + const float distance = *distancePtr; + + DMatch m(queryIdx, trainIdx, imgIdx, distance); + + curMatches.push_back(m); + + ++trainIdxPtr; + ++distancePtr; + if (imgIdxPtr) + ++imgIdxPtr; + } + + if (compactResult && curMatches.empty()) + { + matches.pop_back(); + } + } + } + + // + // radius match + // + + void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors, + std::vector >& matches, + float maxDistance, + InputArray _mask, + bool compactResult) + { + GpuMat d_matches; + radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask); + radiusMatchConvert(d_matches, matches, compactResult); + } + + void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, + std::vector >& matches, + float maxDistance, + const std::vector& masks, + bool compactResult) + { + GpuMat d_matches; + radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks); + radiusMatchConvert(d_matches, matches, compactResult); + } + + void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors, + OutputArray _matches, + float maxDistance, + InputArray _mask, + Stream& stream) + { + using namespace cv::cuda::device::bf_radius_match; + + const GpuMat query = _queryDescriptors.getGpuMat(); + const GpuMat train = _trainDescriptors.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); + + if (query.empty() || train.empty()) + { + _matches.release(); + return; + } + + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + CV_Assert( train.cols == query.cols && train.type() == query.type() ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) ); + + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, + const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, + cudaStream_t stream); + + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; + + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; + + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } + + const int nQuery = query.rows; + const int nTrain = train.rows; + + const int cols = std::max((nTrain / 100), nQuery); + + _matches.create(2 * nQuery + 1, cols, CV_32SC1); + GpuMat matches = _matches.getGpuMat(); + + GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step); + GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step); + GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery)); + + nMatches.setTo(Scalar::all(0), stream); + + func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream)); + } + + void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, + OutputArray _matches, + float maxDistance, + const std::vector& masks, + Stream& stream) + { + using namespace cv::cuda::device::bf_radius_match; + + const GpuMat query = _queryDescriptors.getGpuMat(); + + if (query.empty() || trainDescCollection_.empty()) + { + _matches.release(); + return; + } + + CV_Assert( query.channels() == 1 && query.depth() < CV_64F ); + + GpuMat trainCollection, maskCollection; + makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection); + + typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, + const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, + cudaStream_t stream); + + static const caller_t callersL1[] = + { + matchL1_gpu, 0/*matchL1_gpu*/, + matchL1_gpu, matchL1_gpu, + matchL1_gpu, matchL1_gpu + }; + static const caller_t callersL2[] = + { + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, + 0/*matchL2_gpu*/, matchL2_gpu + }; + static const caller_t callersHamming[] = + { + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/, + matchHamming_gpu, 0/*matchHamming_gpu*/ + }; + + const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming; + + const caller_t func = callers[query.depth()]; + if (func == 0) + { + CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm"); + } + + const int nQuery = query.rows; + + _matches.create(3 * nQuery + 1, nQuery, CV_32FC1); + GpuMat matches = _matches.getGpuMat(); + + GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step); + GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step); + GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step); + GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery)); + + nMatches.setTo(Scalar::all(0), stream); + + std::vector trains_(trainDescCollection_.begin(), trainDescCollection_.end()); + std::vector masks_(masks.begin(), masks.end()); + + func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], + trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); + } + + void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches, + std::vector< std::vector >& matches, + bool compactResult) + { + Mat gpu_matches; + if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_matches.getGpuMat().download(gpu_matches); + } + else + { + gpu_matches = _gpu_matches.getMat(); + } + + if (gpu_matches.empty()) + { + matches.clear(); + return; + } + + CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 ); + + int nQuery = -1; + + const int* trainIdxPtr = NULL; + const int* imgIdxPtr = NULL; + const float* distancePtr = NULL; + const int* nMatchesPtr = NULL; + + if (gpu_matches.type() == CV_32SC1) + { + nQuery = (gpu_matches.rows - 1) / 2; + + trainIdxPtr = gpu_matches.ptr(0); + distancePtr = gpu_matches.ptr(nQuery); + nMatchesPtr = gpu_matches.ptr(2 * nQuery); + } + else + { + nQuery = (gpu_matches.rows - 1) / 3; + + trainIdxPtr = gpu_matches.ptr(0); + imgIdxPtr = gpu_matches.ptr(nQuery); + distancePtr = gpu_matches.ptr(2 * nQuery); + nMatchesPtr = gpu_matches.ptr(3 * nQuery); + } + + matches.clear(); + matches.reserve(nQuery); + + for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) + { + const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols); + + if (nMatched == 0) + { + if (!compactResult) + { + matches.push_back(std::vector()); + } + } + else + { + matches.push_back(std::vector(nMatched)); + std::vector& curMatches = matches.back(); + + for (int i = 0; i < nMatched; ++i) + { + const int trainIdx = trainIdxPtr[i]; + + const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0; + const float distance = distancePtr[i]; + + DMatch m(queryIdx, trainIdx, imgIdx, distance); + + curMatches[i] = m; + } + + std::sort(curMatches.begin(), curMatches.end()); + } + + trainIdxPtr += gpu_matches.cols; + distancePtr += gpu_matches.cols; + if (imgIdxPtr) + imgIdxPtr += gpu_matches.cols; } } } -//////////////////////////////////////////////////////////////////// -// RadiusMatch - -void cv::cuda::BFMatcher_CUDA::radiusMatchSingle(const GpuMat& query, const GpuMat& train, - GpuMat& trainIdx, GpuMat& distance, GpuMat& nMatches, float maxDistance, - const GpuMat& mask, Stream& stream) +Ptr cv::cuda::DescriptorMatcher::createBFMatcher(int norm) { - if (query.empty() || train.empty()) - return; - - using namespace cv::cuda::device::bf_radius_match; - - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask, - const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, - cudaStream_t stream); - - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; - - const int nQuery = query.rows; - const int nTrain = train.rows; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(train.type() == query.type() && train.cols == query.cols); - CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size())); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); - - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches); - if (trainIdx.empty()) - { - ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32SC1, trainIdx); - ensureSizeIsEnough(nQuery, std::max((nTrain / 100), 10), CV_32FC1, distance); - } - - nMatches.setTo(Scalar::all(0), stream); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); - - func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream)); -} - -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty() || nMatches.empty()) - return; - - Mat trainIdxCPU(trainIdx); - Mat distanceCPU(distance); - Mat nMatchesCPU(nMatches); - - radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); -} - -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || distance.empty() || nMatches.empty()) - return; - - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); - CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows); - - const int nQuery = trainIdx.rows; - - matches.clear(); - matches.reserve(nQuery); - - const int* nMatches_ptr = nMatches.ptr(); - - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) - { - const int* trainIdx_ptr = trainIdx.ptr(queryIdx); - const float* distance_ptr = distance.ptr(queryIdx); - - const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols); - - if (nMatched == 0) - { - if (!compactResult) - matches.push_back(std::vector()); - continue; - } - - matches.push_back(std::vector(nMatched)); - std::vector& curMatches = matches.back(); - - for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++distance_ptr) - { - int _trainIdx = *trainIdx_ptr; - - float _distance = *distance_ptr; - - DMatch m(queryIdx, _trainIdx, 0, _distance); - - curMatches[i] = m; - } - - sort(curMatches.begin(), curMatches.end()); - } -} - -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, const GpuMat& train, - std::vector< std::vector >& matches, float maxDistance, const GpuMat& mask, bool compactResult) -{ - GpuMat trainIdx, distance, nMatches; - radiusMatchSingle(query, train, trainIdx, distance, nMatches, maxDistance, mask); - radiusMatchDownload(trainIdx, distance, nMatches, matches, compactResult); -} - -void cv::cuda::BFMatcher_CUDA::radiusMatchCollection(const GpuMat& query, GpuMat& trainIdx, GpuMat& imgIdx, GpuMat& distance, GpuMat& nMatches, - float maxDistance, const std::vector& masks, Stream& stream) -{ - if (query.empty() || empty()) - return; - - using namespace cv::cuda::device::bf_radius_match; - - typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks, - const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz& nMatches, - cudaStream_t stream); - - static const caller_t callersL1[] = - { - matchL1_gpu, 0/*matchL1_gpu*/, - matchL1_gpu, matchL1_gpu, - matchL1_gpu, matchL1_gpu - }; - static const caller_t callersL2[] = - { - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, 0/*matchL2_gpu*/, - 0/*matchL2_gpu*/, matchL2_gpu - }; - static const caller_t callersHamming[] = - { - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/, - matchHamming_gpu, 0/*matchHamming_gpu*/ - }; - - const int nQuery = query.rows; - - CV_Assert(query.channels() == 1 && query.depth() < CV_64F); - CV_Assert(trainIdx.empty() || (trainIdx.rows == nQuery && trainIdx.size() == distance.size() && trainIdx.size() == imgIdx.size())); - CV_Assert(norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING); - - const caller_t* callers = norm == NORM_L1 ? callersL1 : norm == NORM_L2 ? callersL2 : callersHamming; - - ensureSizeIsEnough(1, nQuery, CV_32SC1, nMatches); - if (trainIdx.empty()) - { - ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, trainIdx); - ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32SC1, imgIdx); - ensureSizeIsEnough(nQuery, std::max((nQuery / 100), 10), CV_32FC1, distance); - } - - nMatches.setTo(Scalar::all(0), stream); - - caller_t func = callers[query.depth()]; - CV_Assert(func != 0); - - std::vector trains_(trainDescCollection.begin(), trainDescCollection.end()); - std::vector masks_(masks.begin(), masks.end()); - - func(query, &trains_[0], static_cast(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0], - trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream)); -} - -void cv::cuda::BFMatcher_CUDA::radiusMatchDownload(const GpuMat& trainIdx, const GpuMat& imgIdx, const GpuMat& distance, const GpuMat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) - return; - - Mat trainIdxCPU(trainIdx); - Mat imgIdxCPU(imgIdx); - Mat distanceCPU(distance); - Mat nMatchesCPU(nMatches); - - radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult); -} - -void cv::cuda::BFMatcher_CUDA::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, - std::vector< std::vector >& matches, bool compactResult) -{ - if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty()) - return; - - CV_Assert(trainIdx.type() == CV_32SC1); - CV_Assert(imgIdx.type() == CV_32SC1 && imgIdx.size() == trainIdx.size()); - CV_Assert(distance.type() == CV_32FC1 && distance.size() == trainIdx.size()); - CV_Assert(nMatches.type() == CV_32SC1 && nMatches.cols == trainIdx.rows); - - const int nQuery = trainIdx.rows; - - matches.clear(); - matches.reserve(nQuery); - - const int* nMatches_ptr = nMatches.ptr(); - - for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx) - { - const int* trainIdx_ptr = trainIdx.ptr(queryIdx); - const int* imgIdx_ptr = imgIdx.ptr(queryIdx); - const float* distance_ptr = distance.ptr(queryIdx); - - const int nMatched = std::min(nMatches_ptr[queryIdx], trainIdx.cols); - - if (nMatched == 0) - { - if (!compactResult) - matches.push_back(std::vector()); - continue; - } - - matches.push_back(std::vector()); - std::vector& curMatches = matches.back(); - curMatches.reserve(nMatched); - - for (int i = 0; i < nMatched; ++i, ++trainIdx_ptr, ++imgIdx_ptr, ++distance_ptr) - { - int _trainIdx = *trainIdx_ptr; - int _imgIdx = *imgIdx_ptr; - float _distance = *distance_ptr; - - DMatch m(queryIdx, _trainIdx, _imgIdx, _distance); - - curMatches.push_back(m); - } - - sort(curMatches.begin(), curMatches.end()); - } -} - -void cv::cuda::BFMatcher_CUDA::radiusMatch(const GpuMat& query, std::vector< std::vector >& matches, - float maxDistance, const std::vector& masks, bool compactResult) -{ - GpuMat trainIdx, imgIdx, distance, nMatches; - radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks); - radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult); + return makePtr(norm); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp index 25ba48faf..3046a604b 100644 --- a/modules/cudafeatures2d/test/test_features2d.cpp +++ b/modules/cudafeatures2d/test/test_features2d.cpp @@ -285,7 +285,8 @@ PARAM_TEST_CASE(BruteForceMatcher, cv::cuda::DeviceInfo, NormCode, DescriptorSiz CUDA_TEST_P(BruteForceMatcher, Match_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); cv::cuda::GpuMat mask; if (useMask) @@ -295,7 +296,7 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single) } std::vector matches; - matcher.match(loadMat(query), loadMat(train), matches, mask); + matcher->match(loadMat(query), loadMat(train), matches, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -312,13 +313,14 @@ CUDA_TEST_P(BruteForceMatcher, Match_Single) CUDA_TEST_P(BruteForceMatcher, Match_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -331,9 +333,9 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection) std::vector matches; if (useMask) - matcher.match(cv::cuda::GpuMat(query), matches, masks); + matcher->match(cv::cuda::GpuMat(query), matches, masks); else - matcher.match(cv::cuda::GpuMat(query), matches); + matcher->match(cv::cuda::GpuMat(query), matches); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -366,7 +368,8 @@ CUDA_TEST_P(BruteForceMatcher, Match_Collection) CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 2; @@ -378,7 +381,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) } std::vector< std::vector > matches; - matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask); + matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -405,7 +408,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Single) CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 3; @@ -417,7 +421,7 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) } std::vector< std::vector > matches; - matcher.knnMatch(loadMat(query), loadMat(train), matches, knn, mask); + matcher->knnMatch(loadMat(query), loadMat(train), matches, knn, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -444,15 +448,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Single) CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 2; cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -466,9 +471,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) std::vector< std::vector > matches; if (useMask) - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); else - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -506,15 +511,16 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_2_Collection) CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int knn = 3; cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -528,9 +534,9 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) std::vector< std::vector > matches; if (useMask) - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn, masks); else - matcher.knnMatch(cv::cuda::GpuMat(query), matches, knn); + matcher->knnMatch(cv::cuda::GpuMat(query), matches, knn); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -568,7 +574,8 @@ CUDA_TEST_P(BruteForceMatcher, KnnMatch_3_Collection) CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const float radius = 1.f / countFactor; @@ -577,7 +584,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) try { std::vector< std::vector > matches; - matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius); + matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius); } catch (const cv::Exception& e) { @@ -594,7 +601,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) } std::vector< std::vector > matches; - matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius, mask); + matcher->radiusMatch(loadMat(query), loadMat(train), matches, radius, mask); ASSERT_EQ(static_cast(queryDescCount), matches.size()); @@ -617,7 +624,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Single) CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) { - cv::cuda::BFMatcher_CUDA matcher(normCode); + cv::Ptr matcher = + cv::cuda::DescriptorMatcher::createBFMatcher(normCode); const int n = 3; const float radius = 1.f / countFactor * n; @@ -625,8 +633,8 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) cv::cuda::GpuMat d_train(train); // make add() twice to test such case - matcher.add(std::vector(1, d_train.rowRange(0, train.rows / 2))); - matcher.add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); + matcher->add(std::vector(1, d_train.rowRange(0, train.rows / 2))); + matcher->add(std::vector(1, d_train.rowRange(train.rows / 2, train.rows))); // prepare masks (make first nearest match illegal) std::vector masks(2); @@ -642,7 +650,7 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) try { std::vector< std::vector > matches; - matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); + matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); } catch (const cv::Exception& e) { @@ -654,9 +662,9 @@ CUDA_TEST_P(BruteForceMatcher, RadiusMatch_Collection) std::vector< std::vector > matches; if (useMask) - matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); + matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius, masks); else - matcher.radiusMatch(cv::cuda::GpuMat(query), matches, radius); + matcher->radiusMatch(cv::cuda::GpuMat(query), matches, radius); ASSERT_EQ(static_cast(queryDescCount), matches.size()); diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 49ee0f474..ee05268d7 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -154,7 +154,7 @@ void CpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat matches_info.matches.clear(); - Ptr matcher; + Ptr matcher; #if 0 // TODO check this if (ocl::useOpenCL()) { @@ -220,13 +220,13 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat descriptors1_.upload(features1.descriptors); descriptors2_.upload(features2.descriptors); - BFMatcher_CUDA matcher(NORM_L2); + Ptr matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2); + MatchesSet matches; // Find 1->2 matches pair_matches.clear(); - matcher.knnMatchSingle(descriptors1_, descriptors2_, train_idx_, distance_, all_dist_, 2); - matcher.knnMatchDownload(train_idx_, distance_, pair_matches); + matcher->knnMatch(descriptors1_, descriptors2_, pair_matches, 2); for (size_t i = 0; i < pair_matches.size(); ++i) { if (pair_matches[i].size() < 2) @@ -242,8 +242,7 @@ void GpuMatcher::match(const ImageFeatures &features1, const ImageFeatures &feat // Find 2->1 matches pair_matches.clear(); - matcher.knnMatchSingle(descriptors2_, descriptors1_, train_idx_, distance_, all_dist_, 2); - matcher.knnMatchDownload(train_idx_, distance_, pair_matches); + matcher->knnMatch(descriptors2_, descriptors1_, pair_matches, 2); for (size_t i = 0; i < pair_matches.size(); ++i) { if (pair_matches[i].size() < 2) diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 0d083e5bd..09094282f 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -379,14 +379,14 @@ TEST(BruteForceMatcher) // Init CUDA matcher - cuda::BFMatcher_CUDA d_matcher(NORM_L2); + Ptr d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2); cuda::GpuMat d_query(query); cuda::GpuMat d_train(train); // Output vector< vector > matches(2); - cuda::GpuMat d_trainIdx, d_distance, d_allDist, d_nMatches; + cuda::GpuMat d_matches; SUBTEST << "match"; @@ -396,10 +396,10 @@ TEST(BruteForceMatcher) matcher.match(query, train, matches[0]); CPU_OFF; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + d_matcher->matchAsync(d_query, d_train, d_matches); CUDA_ON; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + d_matcher->matchAsync(d_query, d_train, d_matches); CUDA_OFF; SUBTEST << "knnMatch"; @@ -410,10 +410,10 @@ TEST(BruteForceMatcher) matcher.knnMatch(query, train, matches, 2); CPU_OFF; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2); CUDA_ON; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2); CUDA_OFF; SUBTEST << "radiusMatch"; @@ -426,12 +426,10 @@ TEST(BruteForceMatcher) matcher.radiusMatch(query, train, matches, max_distance); CPU_OFF; - d_trainIdx.release(); - - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance); CUDA_ON; - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance); CUDA_OFF; } From 5f1282afdb0b3d137b0cf2161d5691bc877fc3f3 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 13 Jan 2015 18:23:59 +0300 Subject: [PATCH 8/8] fix documentation warnings --- modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index 975726973..1d7f4e4e4 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -89,7 +89,7 @@ public: preferable choices for SIFT and SURF descriptors, NORM_HAMMING should be used with ORB, BRISK and BRIEF). */ - static Ptr createBFMatcher(int norm = cv::NORM_L2); + static Ptr createBFMatcher(int normType = cv::NORM_L2); // // Utility @@ -248,9 +248,6 @@ public: less than k possible matches in total. @param mask Mask specifying permissible matches between an input query and train matrices of descriptors. - @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is - false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, - the matches vector does not contain matches for fully masked-out query descriptors. @param stream CUDA stream. These extended variants of DescriptorMatcher::matchAsync methods find several best matches for each query @@ -335,9 +332,6 @@ public: in Pixels)! @param mask Mask specifying permissible matches between an input query and train matrices of descriptors. - @param compactResult Parameter used when the mask (or masks) is not empty. If compactResult is - false, the matches vector has the same size as queryDescriptors rows. If compactResult is true, - the matches vector does not contain matches for fully masked-out query descriptors. @param stream CUDA stream. For each query descriptor, the methods find such training descriptors that the distance between the