diff --git a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp index a193eb6f8..4a78d50e6 100644 --- a/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp +++ b/modules/cudafeatures2d/include/opencv2/cudafeatures2d.hpp @@ -284,9 +284,11 @@ public: virtual int getMaxNumPoints() const = 0; }; -/** @brief Class for extracting ORB features and descriptors from an image. : - */ -class CV_EXPORTS ORB_CUDA +// +// ORB +// + +class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync { public: enum @@ -300,113 +302,20 @@ public: ROWS_COUNT }; - enum - { - DEFAULT_FAST_THRESHOLD = 20 - }; - - /** @brief Constructor. - - @param nFeatures The number of desired features. - @param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to - the next. - @param nLevels The number of levels in the scale pyramid. - @param edgeThreshold How far from the boundary the points should be. - @param firstLevel The level at which the image is given. If 1, that means we will also look at the - image scaleFactor times bigger. - @param WTA_K - @param scoreType - @param patchSize - */ - explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, - int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31); - - /** @overload */ - void operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints); - /** @overload */ - void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints); - - /** @brief Detects keypoints and computes descriptors for them. - - @param image Input 8-bit grayscale image. - @param mask Optional input mask that marks the regions where we should detect features. - @param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory. - For GPU memory: - - keypoints.ptr\(X_ROW)[i] contains x coordinate of the i'th feature. - - keypoints.ptr\(Y_ROW)[i] contains y coordinate of the i'th feature. - - keypoints.ptr\(RESPONSE_ROW)[i] contains the response of the i'th feature. - - keypoints.ptr\(ANGLE_ROW)[i] contains orientation of the i'th feature. - - keypoints.ptr\(OCTAVE_ROW)[i] contains the octave of the i'th feature. - - keypoints.ptr\(SIZE_ROW)[i] contains the size of the i'th feature. - @param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred - before descriptors calculation. - */ - void operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints, GpuMat& descriptors); - /** @overload */ - void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors); - - /** @brief Download keypoints from GPU to CPU memory. - */ - static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector& keypoints); - /** @brief Converts keypoints from CUDA representation to vector of KeyPoint. - */ - static void convertKeyPoints(const Mat& d_keypoints, std::vector& keypoints); - - //! returns the descriptor size in bytes - inline int descriptorSize() const { return kBytes; } - - inline void setFastParams(int threshold, bool nonmaxSuppression = true) - { - fastDetector_->setThreshold(threshold); - fastDetector_->setNonmaxSuppression(nonmaxSuppression); - } - - /** @brief Releases inner buffer memory. - */ - void release(); + static Ptr create(int nfeatures=500, + float scaleFactor=1.2f, + int nlevels=8, + int edgeThreshold=31, + int firstLevel=0, + int WTA_K=2, + int scoreType=ORB::HARRIS_SCORE, + int patchSize=31, + int fastThreshold=20, + bool blurForDescriptor=false); //! if true, image will be blurred before descriptors calculation - bool blurForDescriptor; - -private: - enum { kBytes = 32 }; - - void buildScalePyramids(const GpuMat& image, const GpuMat& mask); - - void computeKeyPointsPyramid(); - - void computeDescriptors(GpuMat& descriptors); - - void mergeKeyPoints(GpuMat& keypoints); - - int nFeatures_; - float scaleFactor_; - int nLevels_; - int edgeThreshold_; - int firstLevel_; - int WTA_K_; - int scoreType_; - int patchSize_; - - //! The number of desired features per scale - std::vector n_features_per_level_; - - //! Points to compute BRIEF descriptors from - GpuMat pattern_; - - std::vector imagePyr_; - std::vector maskPyr_; - - GpuMat buf_; - - std::vector keyPointsPyr_; - std::vector keyPointsCount_; - - Ptr fastDetector_; - - Ptr blurFilter; - - GpuMat d_keypoints_; + virtual void setBlurForDescriptor(bool blurForDescriptor) = 0; + virtual bool getBlurForDescriptor() const = 0; }; //! @} diff --git a/modules/cudafeatures2d/perf/perf_features2d.cpp b/modules/cudafeatures2d/perf/perf_features2d.cpp index da3cd77db..0dcb0434f 100644 --- a/modules/cudafeatures2d/perf/perf_features2d.cpp +++ b/modules/cudafeatures2d/perf/perf_features2d.cpp @@ -109,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB, if (PERF_RUN_CUDA()) { - cv::cuda::ORB_CUDA d_orb(nFeatures); + cv::Ptr d_orb = cv::cuda::ORB::create(nFeatures); const cv::cuda::GpuMat d_img(img); cv::cuda::GpuMat d_keypoints, d_descriptors; - TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors); + TEST_CYCLE() d_orb->detectAndComputeAsync(d_img, cv::noArray(), d_keypoints, d_descriptors); std::vector gpu_keypoints; - d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints); + d_orb->convert(d_keypoints, gpu_keypoints); cv::Mat gpu_descriptors(d_descriptors); diff --git a/modules/cudafeatures2d/src/orb.cpp b/modules/cudafeatures2d/src/orb.cpp index c04649b1f..6bfdd5ac4 100644 --- a/modules/cudafeatures2d/src/orb.cpp +++ b/modules/cudafeatures2d/src/orb.cpp @@ -47,18 +47,7 @@ using namespace cv::cuda; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::cuda::ORB_CUDA::ORB_CUDA(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat&, std::vector&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::release() { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat&) { throw_no_cuda(); } -void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat&) { throw_no_cuda(); } +Ptr cv::cuda::ORB::create(int, float, int, int, int, int, int, int, int, bool) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -346,7 +335,100 @@ namespace -1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/ }; - void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize) + class ORB_Impl : public cv::cuda::ORB + { + public: + ORB_Impl(int nfeatures, + float scaleFactor, + int nlevels, + int edgeThreshold, + int firstLevel, + int WTA_K, + int scoreType, + int patchSize, + int fastThreshold, + bool blurForDescriptor); + + virtual void detectAndCompute(InputArray _image, InputArray _mask, std::vector& keypoints, OutputArray _descriptors, bool useProvidedKeypoints); + virtual void detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream); + + virtual void convert(InputArray _gpu_keypoints, std::vector& keypoints); + + virtual int descriptorSize() const { return kBytes; } + virtual int descriptorType() const { return CV_8U; } + virtual int defaultNorm() const { return NORM_HAMMING; } + + virtual void setMaxFeatures(int maxFeatures) { nFeatures_ = maxFeatures; } + virtual int getMaxFeatures() const { return nFeatures_; } + + virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; } + virtual double getScaleFactor() const { return scaleFactor_; } + + virtual void setNLevels(int nlevels) { nLevels_ = nlevels; } + virtual int getNLevels() const { return nLevels_; } + + virtual void setEdgeThreshold(int edgeThreshold) { edgeThreshold_ = edgeThreshold; } + virtual int getEdgeThreshold() const { return edgeThreshold_; } + + virtual void setFirstLevel(int firstLevel) { firstLevel_ = firstLevel; } + virtual int getFirstLevel() const { return firstLevel_; } + + virtual void setWTA_K(int wta_k) { WTA_K_ = wta_k; } + virtual int getWTA_K() const { return WTA_K_; } + + virtual void setScoreType(int scoreType) { scoreType_ = scoreType; } + virtual int getScoreType() const { return scoreType_; } + + virtual void setPatchSize(int patchSize) { patchSize_ = patchSize; } + virtual int getPatchSize() const { return patchSize_; } + + virtual void setFastThreshold(int fastThreshold) { fastThreshold_ = fastThreshold; } + virtual int getFastThreshold() const { return fastThreshold_; } + + virtual void setBlurForDescriptor(bool blurForDescriptor) { blurForDescriptor_ = blurForDescriptor; } + virtual bool getBlurForDescriptor() const { return blurForDescriptor_; } + + private: + int nFeatures_; + float scaleFactor_; + int nLevels_; + int edgeThreshold_; + int firstLevel_; + int WTA_K_; + int scoreType_; + int patchSize_; + int fastThreshold_; + bool blurForDescriptor_; + + private: + void buildScalePyramids(InputArray _image, InputArray _mask); + void computeKeyPointsPyramid(); + void computeDescriptors(OutputArray _descriptors); + void mergeKeyPoints(OutputArray _keypoints); + + private: + Ptr fastDetector_; + + //! The number of desired features per scale + std::vector n_features_per_level_; + + //! Points to compute BRIEF descriptors from + GpuMat pattern_; + + std::vector imagePyr_; + std::vector maskPyr_; + + GpuMat buf_; + + std::vector keyPointsPyr_; + std::vector keyPointsCount_; + + Ptr blurFilter_; + + GpuMat d_keypoints_; + }; + + static void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize) { RNG rng(0x12345678); @@ -381,7 +463,7 @@ namespace } } - void makeRandomPattern(int patchSize, Point* pattern, int npoints) + static void makeRandomPattern(int patchSize, Point* pattern, int npoints) { // we always start with a fixed seed, // to make patterns the same on each run @@ -393,155 +475,189 @@ namespace pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1); } } -} -cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) : - nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K), - scoreType_(scoreType), patchSize_(patchSize), - fastDetector_(cuda::FastFeatureDetector::create(DEFAULT_FAST_THRESHOLD)) -{ - CV_Assert(patchSize_ >= 2); - - // fill the extractors and descriptors for the corresponding scales - float factor = 1.0f / scaleFactor_; - float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_)); - - n_features_per_level_.resize(nLevels_); - size_t sum_n_features = 0; - for (int level = 0; level < nLevels_ - 1; ++level) + ORB_Impl::ORB_Impl(int nFeatures, + float scaleFactor, + int nLevels, + int edgeThreshold, + int firstLevel, + int WTA_K, + int scoreType, + int patchSize, + int fastThreshold, + bool blurForDescriptor) : + nFeatures_(nFeatures), + scaleFactor_(scaleFactor), + nLevels_(nLevels), + edgeThreshold_(edgeThreshold), + firstLevel_(firstLevel), + WTA_K_(WTA_K), + scoreType_(scoreType), + patchSize_(patchSize), + fastThreshold_(fastThreshold), + blurForDescriptor_(blurForDescriptor) { - n_features_per_level_[level] = cvRound(n_desired_features_per_scale); - sum_n_features += n_features_per_level_[level]; - n_desired_features_per_scale *= factor; - } - n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features; + CV_Assert( patchSize_ >= 2 ); + CV_Assert( WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4 ); - // pre-compute the end of a row in a circular patch - int half_patch_size = patchSize_ / 2; - std::vector u_max(half_patch_size + 2); - for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v) - u_max[v] = cvRound(std::sqrt(static_cast(half_patch_size * half_patch_size - v * v))); + fastDetector_ = cuda::FastFeatureDetector::create(fastThreshold_); - // Make sure we are symmetric - for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v) - { - while (u_max[v_0] == u_max[v_0 + 1]) - ++v_0; - u_max[v] = v_0; - ++v_0; - } - CV_Assert(u_max.size() < 32); - cv::cuda::device::orb::loadUMax(&u_max[0], static_cast(u_max.size())); + // fill the extractors and descriptors for the corresponding scales + float factor = 1.0f / scaleFactor_; + float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_)); - // Calc pattern - const int npoints = 512; - Point pattern_buf[npoints]; - const Point* pattern0 = (const Point*)bit_pattern_31_; - if (patchSize_ != 31) - { - pattern0 = pattern_buf; - makeRandomPattern(patchSize_, pattern_buf, npoints); - } - - CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4); - - Mat h_pattern; - - if (WTA_K_ == 2) - { - h_pattern.create(2, npoints, CV_32SC1); - - int* pattern_x_ptr = h_pattern.ptr(0); - int* pattern_y_ptr = h_pattern.ptr(1); - - for (int i = 0; i < npoints; ++i) + n_features_per_level_.resize(nLevels_); + size_t sum_n_features = 0; + for (int level = 0; level < nLevels_ - 1; ++level) { - pattern_x_ptr[i] = pattern0[i].x; - pattern_y_ptr[i] = pattern0[i].y; + n_features_per_level_[level] = cvRound(n_desired_features_per_scale); + sum_n_features += n_features_per_level_[level]; + n_desired_features_per_scale *= factor; } - } - else - { - int ntuples = descriptorSize() * 4; - initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints); - } + n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features; - pattern_.upload(h_pattern); - - blurFilter = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101); - - blurForDescriptor = false; -} - -namespace -{ - inline float getScale(float scaleFactor, int firstLevel, int level) - { - return pow(scaleFactor, level - firstLevel); - } -} - -void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat& image, const GpuMat& mask) -{ - CV_Assert(image.type() == CV_8UC1); - CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); - - imagePyr_.resize(nLevels_); - maskPyr_.resize(nLevels_); - - for (int level = 0; level < nLevels_; ++level) - { - float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level); - - Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale)); - - ensureSizeIsEnough(sz, image.type(), imagePyr_[level]); - ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]); - maskPyr_[level].setTo(Scalar::all(255)); - - // Compute the resized image - if (level != firstLevel_) + // pre-compute the end of a row in a circular patch + int half_patch_size = patchSize_ / 2; + std::vector u_max(half_patch_size + 2); + for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v) { - if (level < firstLevel_) - { - cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR); + u_max[v] = cvRound(std::sqrt(static_cast(half_patch_size * half_patch_size - v * v))); + } - if (!mask.empty()) - cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR); - } - else - { - cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR); + // Make sure we are symmetric + for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v) + { + while (u_max[v_0] == u_max[v_0 + 1]) + ++v_0; + u_max[v] = v_0; + ++v_0; + } + CV_Assert( u_max.size() < 32 ); + cv::cuda::device::orb::loadUMax(&u_max[0], static_cast(u_max.size())); - if (!mask.empty()) - { - cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR); - cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO); - } + // Calc pattern + const int npoints = 512; + Point pattern_buf[npoints]; + const Point* pattern0 = (const Point*)bit_pattern_31_; + if (patchSize_ != 31) + { + pattern0 = pattern_buf; + makeRandomPattern(patchSize_, pattern_buf, npoints); + } + + Mat h_pattern; + if (WTA_K_ == 2) + { + h_pattern.create(2, npoints, CV_32SC1); + + int* pattern_x_ptr = h_pattern.ptr(0); + int* pattern_y_ptr = h_pattern.ptr(1); + + for (int i = 0; i < npoints; ++i) + { + pattern_x_ptr[i] = pattern0[i].x; + pattern_y_ptr[i] = pattern0[i].y; } } else { - image.copyTo(imagePyr_[level]); - - if (!mask.empty()) - mask.copyTo(maskPyr_[level]); + int ntuples = descriptorSize() * 4; + initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints); } - // Filter keypoints by image border - ensureSizeIsEnough(sz, CV_8UC1, buf_); - buf_.setTo(Scalar::all(0)); - Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_); - buf_(inner).setTo(Scalar::all(255)); + pattern_.upload(h_pattern); - cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]); + blurFilter_ = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101); } -} -namespace -{ - //takes keypoints and culls them by the response - void cull(GpuMat& keypoints, int& count, int n_points) + void ORB_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector& keypoints, OutputArray _descriptors, bool useProvidedKeypoints) + { + CV_Assert( useProvidedKeypoints == false ); + + detectAndComputeAsync(_image, _mask, d_keypoints_, _descriptors, false, Stream::Null()); + convert(d_keypoints_, keypoints); + } + + void ORB_Impl::detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream) + { + CV_Assert( useProvidedKeypoints == false ); + + buildScalePyramids(_image, _mask); + computeKeyPointsPyramid(); + if (_descriptors.needed()) + { + computeDescriptors(_descriptors); + } + mergeKeyPoints(_keypoints); + } + + static float getScale(float scaleFactor, int firstLevel, int level) + { + return pow(scaleFactor, level - firstLevel); + } + + void ORB_Impl::buildScalePyramids(InputArray _image, InputArray _mask) + { + const GpuMat image = _image.getGpuMat(); + const GpuMat mask = _mask.getGpuMat(); + + CV_Assert( image.type() == CV_8UC1 ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) ); + + imagePyr_.resize(nLevels_); + maskPyr_.resize(nLevels_); + + for (int level = 0; level < nLevels_; ++level) + { + float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level); + + Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale)); + + ensureSizeIsEnough(sz, image.type(), imagePyr_[level]); + ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]); + maskPyr_[level].setTo(Scalar::all(255)); + + // Compute the resized image + if (level != firstLevel_) + { + if (level < firstLevel_) + { + cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR); + + if (!mask.empty()) + cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR); + } + else + { + cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR); + + if (!mask.empty()) + { + cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR); + cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO); + } + } + } + else + { + image.copyTo(imagePyr_[level]); + + if (!mask.empty()) + mask.copyTo(maskPyr_[level]); + } + + // Filter keypoints by image border + ensureSizeIsEnough(sz, CV_8UC1, buf_); + buf_.setTo(Scalar::all(0)); + Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_); + buf_(inner).setTo(Scalar::all(255)); + + cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]); + } + } + + // takes keypoints and culls them by the response + static void cull(GpuMat& keypoints, int& count, int n_points) { using namespace cv::cuda::device::orb; @@ -557,217 +673,196 @@ namespace count = cull_gpu(keypoints.ptr(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points); } } -} -void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() -{ - using namespace cv::cuda::device::orb; - - int half_patch_size = patchSize_ / 2; - - keyPointsPyr_.resize(nLevels_); - keyPointsCount_.resize(nLevels_); - - for (int level = 0; level < nLevels_; ++level) + void ORB_Impl::computeKeyPointsPyramid() { - fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area()); + using namespace cv::cuda::device::orb; - GpuMat fastKpRange; - fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null()); + int half_patch_size = patchSize_ / 2; - keyPointsCount_[level] = fastKpRange.cols; + keyPointsPyr_.resize(nLevels_); + keyPointsCount_.resize(nLevels_); - if (keyPointsCount_[level] == 0) - continue; + fastDetector_->setThreshold(fastThreshold_); - ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]); - fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2)); - - const int n_features = static_cast(n_features_per_level_[level]); - - if (scoreType_ == ORB::HARRIS_SCORE) + for (int level = 0; level < nLevels_; ++level) { - // Keep more points than necessary as FAST does not give amazing corners - cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features); + fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area()); - // Compute the Harris cornerness (better scoring than FAST) - HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(1), keyPointsCount_[level], 7, HARRIS_K, 0); + GpuMat fastKpRange; + fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null()); + + keyPointsCount_[level] = fastKpRange.cols; + + if (keyPointsCount_[level] == 0) + continue; + + ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]); + fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2)); + + const int n_features = static_cast(n_features_per_level_[level]); + + if (scoreType_ == ORB::HARRIS_SCORE) + { + // Keep more points than necessary as FAST does not give amazing corners + cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features); + + // Compute the Harris cornerness (better scoring than FAST) + HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(1), keyPointsCount_[level], 7, HARRIS_K, 0); + } + + //cull to the final desired level, using the new Harris scores or the original FAST scores. + cull(keyPointsPyr_[level], keyPointsCount_[level], n_features); + + // Compute orientation + IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), keyPointsCount_[level], half_patch_size, 0); + } + } + + void ORB_Impl::computeDescriptors(OutputArray _descriptors) + { + using namespace cv::cuda::device::orb; + + int nAllkeypoints = 0; + + for (int level = 0; level < nLevels_; ++level) + nAllkeypoints += keyPointsCount_[level]; + + if (nAllkeypoints == 0) + { + _descriptors.release(); + return; } - //cull to the final desired level, using the new Harris scores or the original FAST scores. - cull(keyPointsPyr_[level], keyPointsCount_[level], n_features); + ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, _descriptors); + GpuMat descriptors = _descriptors.getGpuMat(); - // Compute orientation - IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), keyPointsCount_[level], half_patch_size, 0); - } -} + int offset = 0; -void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat& descriptors) -{ - using namespace cv::cuda::device::orb; - - int nAllkeypoints = 0; - - for (int level = 0; level < nLevels_; ++level) - nAllkeypoints += keyPointsCount_[level]; - - if (nAllkeypoints == 0) - { - descriptors.release(); - return; - } - - ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors); - - int offset = 0; - - for (int level = 0; level < nLevels_; ++level) - { - if (keyPointsCount_[level] == 0) - continue; - - GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]); - - if (blurForDescriptor) + for (int level = 0; level < nLevels_; ++level) { - // preprocess the resized image - ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); - blurFilter->apply(imagePyr_[level], buf_); + if (keyPointsCount_[level] == 0) + continue; + + GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]); + + if (blurForDescriptor_) + { + // preprocess the resized image + ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); + blurFilter_->apply(imagePyr_[level], buf_); + } + + computeOrbDescriptor_gpu(blurForDescriptor_ ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), + keyPointsCount_[level], pattern_.ptr(0), pattern_.ptr(1), descRange, descriptorSize(), WTA_K_, 0); + + offset += keyPointsCount_[level]; + } + } + + void ORB_Impl::mergeKeyPoints(OutputArray _keypoints) + { + using namespace cv::cuda::device::orb; + + int nAllkeypoints = 0; + + for (int level = 0; level < nLevels_; ++level) + nAllkeypoints += keyPointsCount_[level]; + + if (nAllkeypoints == 0) + { + _keypoints.release(); + return; } - computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), - keyPointsCount_[level], pattern_.ptr(0), pattern_.ptr(1), descRange, descriptorSize(), WTA_K_, 0); + ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, _keypoints); + GpuMat& keypoints = _keypoints.getGpuMatRef(); - offset += keyPointsCount_[level]; + int offset = 0; + + for (int level = 0; level < nLevels_; ++level) + { + if (keyPointsCount_[level] == 0) + continue; + + float sf = getScale(scaleFactor_, firstLevel_, level); + + GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]); + + float locScale = level != firstLevel_ ? sf : 1.0f; + + mergeLocation_gpu(keyPointsPyr_[level].ptr(0), keyPointsRange.ptr(0), keyPointsRange.ptr(1), keyPointsCount_[level], locScale, 0); + + GpuMat range = keyPointsRange.rowRange(2, 4); + keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range); + + keyPointsRange.row(4).setTo(Scalar::all(level)); + keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf)); + + offset += keyPointsCount_[level]; + } } -} -void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat& keypoints) -{ - using namespace cv::cuda::device::orb; - - int nAllkeypoints = 0; - - for (int level = 0; level < nLevels_; ++level) - nAllkeypoints += keyPointsCount_[level]; - - if (nAllkeypoints == 0) + void ORB_Impl::convert(InputArray _gpu_keypoints, std::vector& keypoints) { - keypoints.release(); - return; - } + if (_gpu_keypoints.empty()) + { + keypoints.clear(); + return; + } - ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints); + Mat h_keypoints; + if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT) + { + _gpu_keypoints.getGpuMat().download(h_keypoints); + } + else + { + h_keypoints = _gpu_keypoints.getMat(); + } - int offset = 0; + CV_Assert( h_keypoints.rows == ROWS_COUNT ); + CV_Assert( h_keypoints.type() == CV_32FC1 ); - for (int level = 0; level < nLevels_; ++level) - { - if (keyPointsCount_[level] == 0) - continue; + const int npoints = h_keypoints.cols; - float sf = getScale(scaleFactor_, firstLevel_, level); + keypoints.resize(npoints); - GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]); + const float* x_ptr = h_keypoints.ptr(X_ROW); + const float* y_ptr = h_keypoints.ptr(Y_ROW); + const float* response_ptr = h_keypoints.ptr(RESPONSE_ROW); + const float* angle_ptr = h_keypoints.ptr(ANGLE_ROW); + const float* octave_ptr = h_keypoints.ptr(OCTAVE_ROW); + const float* size_ptr = h_keypoints.ptr(SIZE_ROW); - float locScale = level != firstLevel_ ? sf : 1.0f; + for (int i = 0; i < npoints; ++i) + { + KeyPoint kp; - mergeLocation_gpu(keyPointsPyr_[level].ptr(0), keyPointsRange.ptr(0), keyPointsRange.ptr(1), keyPointsCount_[level], locScale, 0); + kp.pt.x = x_ptr[i]; + kp.pt.y = y_ptr[i]; + kp.response = response_ptr[i]; + kp.angle = angle_ptr[i]; + kp.octave = static_cast(octave_ptr[i]); + kp.size = size_ptr[i]; - GpuMat range = keyPointsRange.rowRange(2, 4); - keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range); - - keyPointsRange.row(4).setTo(Scalar::all(level)); - keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf)); - - offset += keyPointsCount_[level]; + keypoints[i] = kp; + } } } -void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat &d_keypoints, std::vector& keypoints) +Ptr cv::cuda::ORB::create(int nfeatures, + float scaleFactor, + int nlevels, + int edgeThreshold, + int firstLevel, + int WTA_K, + int scoreType, + int patchSize, + int fastThreshold, + bool blurForDescriptor) { - if (d_keypoints.empty()) - { - keypoints.clear(); - return; - } - - Mat h_keypoints(d_keypoints); - - convertKeyPoints(h_keypoints, keypoints); -} - -void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat &d_keypoints, std::vector& keypoints) -{ - if (d_keypoints.empty()) - { - keypoints.clear(); - return; - } - - CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT); - - const float* x_ptr = d_keypoints.ptr(X_ROW); - const float* y_ptr = d_keypoints.ptr(Y_ROW); - const float* response_ptr = d_keypoints.ptr(RESPONSE_ROW); - const float* angle_ptr = d_keypoints.ptr(ANGLE_ROW); - const float* octave_ptr = d_keypoints.ptr(OCTAVE_ROW); - const float* size_ptr = d_keypoints.ptr(SIZE_ROW); - - keypoints.resize(d_keypoints.cols); - - for (int i = 0; i < d_keypoints.cols; ++i) - { - KeyPoint kp; - - kp.pt.x = x_ptr[i]; - kp.pt.y = y_ptr[i]; - kp.response = response_ptr[i]; - kp.angle = angle_ptr[i]; - kp.octave = static_cast(octave_ptr[i]); - kp.size = size_ptr[i]; - - keypoints[i] = kp; - } -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints) -{ - buildScalePyramids(image, mask); - computeKeyPointsPyramid(); - mergeKeyPoints(keypoints); -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors) -{ - buildScalePyramids(image, mask); - computeKeyPointsPyramid(); - computeDescriptors(descriptors); - mergeKeyPoints(keypoints); -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints) -{ - (*this)(image, mask, d_keypoints_); - downloadKeyPoints(d_keypoints_, keypoints); -} - -void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector& keypoints, GpuMat& descriptors) -{ - (*this)(image, mask, d_keypoints_, descriptors); - downloadKeyPoints(d_keypoints_, keypoints); -} - -void cv::cuda::ORB_CUDA::release() -{ - imagePyr_.clear(); - maskPyr_.clear(); - - buf_.release(); - - keyPointsPyr_.clear(); - - d_keypoints_.release(); + return makePtr(nfeatures, scaleFactor, nlevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize, fastThreshold, blurForDescriptor); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/cudafeatures2d/test/test_features2d.cpp b/modules/cudafeatures2d/test/test_features2d.cpp index 9a8d76ce3..25ba48faf 100644 --- a/modules/cudafeatures2d/test/test_features2d.cpp +++ b/modules/cudafeatures2d/test/test_features2d.cpp @@ -122,7 +122,7 @@ namespace IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool) } -CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE) +CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE) PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor) { @@ -162,8 +162,9 @@ CUDA_TEST_P(ORB, Accuracy) cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1)); mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0)); - cv::cuda::ORB_CUDA orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); - orb.blurForDescriptor = blurForDescriptor; + cv::Ptr orb = + cv::cuda::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, + WTA_K, scoreType, patchSize, 20, blurForDescriptor); if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS)) { @@ -171,7 +172,7 @@ CUDA_TEST_P(ORB, Accuracy) { std::vector keypoints; cv::cuda::GpuMat descriptors; - orb(loadMat(image), loadMat(mask), keypoints, descriptors); + orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors); } catch (const cv::Exception& e) { @@ -182,7 +183,7 @@ CUDA_TEST_P(ORB, Accuracy) { std::vector keypoints; cv::cuda::GpuMat descriptors; - orb(loadMat(image), loadMat(mask), keypoints, descriptors); + orb->detectAndCompute(loadMat(image), loadMat(mask), keypoints, descriptors); cv::Ptr orb_gold = cv::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 8869a1b66..0d083e5bd 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -350,15 +350,15 @@ TEST(ORB) orb->detectAndCompute(src, Mat(), keypoints, descriptors); CPU_OFF; - cuda::ORB_CUDA d_orb; + Ptr d_orb = cuda::ORB::create(); cuda::GpuMat d_src(src); cuda::GpuMat d_keypoints; cuda::GpuMat d_descriptors; - d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); + d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); CUDA_ON; - d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); + d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); CUDA_OFF; }