refactor CUDA ORB feature detector/extractor algorithm:

use new abstract interface and hidden implementation
This commit is contained in:
Vladislav Vinogradov 2015-01-13 10:40:58 +03:00
parent 554ddd2ec4
commit f960a5707d
5 changed files with 447 additions and 442 deletions

View File

@ -284,9 +284,11 @@ public:
virtual int getMaxNumPoints() const = 0; virtual int getMaxNumPoints() const = 0;
}; };
/** @brief Class for extracting ORB features and descriptors from an image. : //
*/ // ORB
class CV_EXPORTS ORB_CUDA //
class CV_EXPORTS ORB : public cv::ORB, public Feature2DAsync
{ {
public: public:
enum enum
@ -300,113 +302,20 @@ public:
ROWS_COUNT ROWS_COUNT
}; };
enum static Ptr<ORB> create(int nfeatures=500,
{ float scaleFactor=1.2f,
DEFAULT_FAST_THRESHOLD = 20 int nlevels=8,
}; int edgeThreshold=31,
int firstLevel=0,
/** @brief Constructor. int WTA_K=2,
int scoreType=ORB::HARRIS_SCORE,
@param nFeatures The number of desired features. int patchSize=31,
@param scaleFactor Coefficient by which we divide the dimensions from one scale pyramid level to int fastThreshold=20,
the next. bool blurForDescriptor=false);
@param nLevels The number of levels in the scale pyramid.
@param edgeThreshold How far from the boundary the points should be.
@param firstLevel The level at which the image is given. If 1, that means we will also look at the
image scaleFactor times bigger.
@param WTA_K
@param scoreType
@param patchSize
*/
explicit ORB_CUDA(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
/** @overload */
void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
/** @overload */
void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints);
/** @brief Detects keypoints and computes descriptors for them.
@param image Input 8-bit grayscale image.
@param mask Optional input mask that marks the regions where we should detect features.
@param keypoints The input/output vector of keypoints. Can be stored both in CPU and GPU memory.
For GPU memory:
- keypoints.ptr\<float\>(X_ROW)[i] contains x coordinate of the i'th feature.
- keypoints.ptr\<float\>(Y_ROW)[i] contains y coordinate of the i'th feature.
- keypoints.ptr\<float\>(RESPONSE_ROW)[i] contains the response of the i'th feature.
- keypoints.ptr\<float\>(ANGLE_ROW)[i] contains orientation of the i'th feature.
- keypoints.ptr\<float\>(OCTAVE_ROW)[i] contains the octave of the i'th feature.
- keypoints.ptr\<float\>(SIZE_ROW)[i] contains the size of the i'th feature.
@param descriptors Computed descriptors. if blurForDescriptor is true, image will be blurred
before descriptors calculation.
*/
void operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors);
/** @overload */
void operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors);
/** @brief Download keypoints from GPU to CPU memory.
*/
static void downloadKeyPoints(const GpuMat& d_keypoints, std::vector<KeyPoint>& keypoints);
/** @brief Converts keypoints from CUDA representation to vector of KeyPoint.
*/
static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
//! returns the descriptor size in bytes
inline int descriptorSize() const { return kBytes; }
inline void setFastParams(int threshold, bool nonmaxSuppression = true)
{
fastDetector_->setThreshold(threshold);
fastDetector_->setNonmaxSuppression(nonmaxSuppression);
}
/** @brief Releases inner buffer memory.
*/
void release();
//! if true, image will be blurred before descriptors calculation //! if true, image will be blurred before descriptors calculation
bool blurForDescriptor; virtual void setBlurForDescriptor(bool blurForDescriptor) = 0;
virtual bool getBlurForDescriptor() const = 0;
private:
enum { kBytes = 32 };
void buildScalePyramids(const GpuMat& image, const GpuMat& mask);
void computeKeyPointsPyramid();
void computeDescriptors(GpuMat& descriptors);
void mergeKeyPoints(GpuMat& keypoints);
int nFeatures_;
float scaleFactor_;
int nLevels_;
int edgeThreshold_;
int firstLevel_;
int WTA_K_;
int scoreType_;
int patchSize_;
//! The number of desired features per scale
std::vector<size_t> n_features_per_level_;
//! Points to compute BRIEF descriptors from
GpuMat pattern_;
std::vector<GpuMat> imagePyr_;
std::vector<GpuMat> maskPyr_;
GpuMat buf_;
std::vector<GpuMat> keyPointsPyr_;
std::vector<int> keyPointsCount_;
Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
Ptr<cuda::Filter> blurFilter;
GpuMat d_keypoints_;
}; };
//! @} //! @}

View File

@ -109,15 +109,15 @@ PERF_TEST_P(Image_NFeatures, ORB,
if (PERF_RUN_CUDA()) if (PERF_RUN_CUDA())
{ {
cv::cuda::ORB_CUDA d_orb(nFeatures); cv::Ptr<cv::cuda::ORB> d_orb = cv::cuda::ORB::create(nFeatures);
const cv::cuda::GpuMat d_img(img); const cv::cuda::GpuMat d_img(img);
cv::cuda::GpuMat d_keypoints, d_descriptors; cv::cuda::GpuMat d_keypoints, d_descriptors;
TEST_CYCLE() d_orb(d_img, cv::cuda::GpuMat(), d_keypoints, d_descriptors); TEST_CYCLE() d_orb->detectAndComputeAsync(d_img, cv::noArray(), d_keypoints, d_descriptors);
std::vector<cv::KeyPoint> gpu_keypoints; std::vector<cv::KeyPoint> gpu_keypoints;
d_orb.downloadKeyPoints(d_keypoints, gpu_keypoints); d_orb->convert(d_keypoints, gpu_keypoints);
cv::Mat gpu_descriptors(d_descriptors); cv::Mat gpu_descriptors(d_descriptors);

View File

@ -47,18 +47,7 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::cuda::ORB_CUDA::ORB_CUDA(int, float, int, int, int, int, int, int) : fastDetector_(20) { throw_no_cuda(); } Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int, float, int, int, int, int, int, int, int, bool) { throw_no_cuda(); return Ptr<cv::cuda::ORB>(); }
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, std::vector<KeyPoint>&, GpuMat&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat&, std::vector<KeyPoint>&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::release() { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat&, const GpuMat&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat&) { throw_no_cuda(); }
void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat&) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
@ -346,7 +335,100 @@ namespace
-1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/ -1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/
}; };
void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize) class ORB_Impl : public cv::cuda::ORB
{
public:
ORB_Impl(int nfeatures,
float scaleFactor,
int nlevels,
int edgeThreshold,
int firstLevel,
int WTA_K,
int scoreType,
int patchSize,
int fastThreshold,
bool blurForDescriptor);
virtual void detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints);
virtual void detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream);
virtual void convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints);
virtual int descriptorSize() const { return kBytes; }
virtual int descriptorType() const { return CV_8U; }
virtual int defaultNorm() const { return NORM_HAMMING; }
virtual void setMaxFeatures(int maxFeatures) { nFeatures_ = maxFeatures; }
virtual int getMaxFeatures() const { return nFeatures_; }
virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
virtual double getScaleFactor() const { return scaleFactor_; }
virtual void setNLevels(int nlevels) { nLevels_ = nlevels; }
virtual int getNLevels() const { return nLevels_; }
virtual void setEdgeThreshold(int edgeThreshold) { edgeThreshold_ = edgeThreshold; }
virtual int getEdgeThreshold() const { return edgeThreshold_; }
virtual void setFirstLevel(int firstLevel) { firstLevel_ = firstLevel; }
virtual int getFirstLevel() const { return firstLevel_; }
virtual void setWTA_K(int wta_k) { WTA_K_ = wta_k; }
virtual int getWTA_K() const { return WTA_K_; }
virtual void setScoreType(int scoreType) { scoreType_ = scoreType; }
virtual int getScoreType() const { return scoreType_; }
virtual void setPatchSize(int patchSize) { patchSize_ = patchSize; }
virtual int getPatchSize() const { return patchSize_; }
virtual void setFastThreshold(int fastThreshold) { fastThreshold_ = fastThreshold; }
virtual int getFastThreshold() const { return fastThreshold_; }
virtual void setBlurForDescriptor(bool blurForDescriptor) { blurForDescriptor_ = blurForDescriptor; }
virtual bool getBlurForDescriptor() const { return blurForDescriptor_; }
private:
int nFeatures_;
float scaleFactor_;
int nLevels_;
int edgeThreshold_;
int firstLevel_;
int WTA_K_;
int scoreType_;
int patchSize_;
int fastThreshold_;
bool blurForDescriptor_;
private:
void buildScalePyramids(InputArray _image, InputArray _mask);
void computeKeyPointsPyramid();
void computeDescriptors(OutputArray _descriptors);
void mergeKeyPoints(OutputArray _keypoints);
private:
Ptr<cv::cuda::FastFeatureDetector> fastDetector_;
//! The number of desired features per scale
std::vector<size_t> n_features_per_level_;
//! Points to compute BRIEF descriptors from
GpuMat pattern_;
std::vector<GpuMat> imagePyr_;
std::vector<GpuMat> maskPyr_;
GpuMat buf_;
std::vector<GpuMat> keyPointsPyr_;
std::vector<int> keyPointsCount_;
Ptr<cuda::Filter> blurFilter_;
GpuMat d_keypoints_;
};
static void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize)
{ {
RNG rng(0x12345678); RNG rng(0x12345678);
@ -381,7 +463,7 @@ namespace
} }
} }
void makeRandomPattern(int patchSize, Point* pattern, int npoints) static void makeRandomPattern(int patchSize, Point* pattern, int npoints)
{ {
// we always start with a fixed seed, // we always start with a fixed seed,
// to make patterns the same on each run // to make patterns the same on each run
@ -393,155 +475,189 @@ namespace
pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1); pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1);
} }
} }
}
cv::cuda::ORB_CUDA::ORB_CUDA(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) : ORB_Impl::ORB_Impl(int nFeatures,
nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K), float scaleFactor,
scoreType_(scoreType), patchSize_(patchSize), int nLevels,
fastDetector_(cuda::FastFeatureDetector::create(DEFAULT_FAST_THRESHOLD)) int edgeThreshold,
{ int firstLevel,
CV_Assert(patchSize_ >= 2); int WTA_K,
int scoreType,
// fill the extractors and descriptors for the corresponding scales int patchSize,
float factor = 1.0f / scaleFactor_; int fastThreshold,
float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_)); bool blurForDescriptor) :
nFeatures_(nFeatures),
n_features_per_level_.resize(nLevels_); scaleFactor_(scaleFactor),
size_t sum_n_features = 0; nLevels_(nLevels),
for (int level = 0; level < nLevels_ - 1; ++level) edgeThreshold_(edgeThreshold),
firstLevel_(firstLevel),
WTA_K_(WTA_K),
scoreType_(scoreType),
patchSize_(patchSize),
fastThreshold_(fastThreshold),
blurForDescriptor_(blurForDescriptor)
{ {
n_features_per_level_[level] = cvRound(n_desired_features_per_scale); CV_Assert( patchSize_ >= 2 );
sum_n_features += n_features_per_level_[level]; CV_Assert( WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4 );
n_desired_features_per_scale *= factor;
}
n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
// pre-compute the end of a row in a circular patch fastDetector_ = cuda::FastFeatureDetector::create(fastThreshold_);
int half_patch_size = patchSize_ / 2;
std::vector<int> u_max(half_patch_size + 2);
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
// Make sure we are symmetric // fill the extractors and descriptors for the corresponding scales
for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v) float factor = 1.0f / scaleFactor_;
{ float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_));
while (u_max[v_0] == u_max[v_0 + 1])
++v_0;
u_max[v] = v_0;
++v_0;
}
CV_Assert(u_max.size() < 32);
cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
// Calc pattern n_features_per_level_.resize(nLevels_);
const int npoints = 512; size_t sum_n_features = 0;
Point pattern_buf[npoints]; for (int level = 0; level < nLevels_ - 1; ++level)
const Point* pattern0 = (const Point*)bit_pattern_31_;
if (patchSize_ != 31)
{
pattern0 = pattern_buf;
makeRandomPattern(patchSize_, pattern_buf, npoints);
}
CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4);
Mat h_pattern;
if (WTA_K_ == 2)
{
h_pattern.create(2, npoints, CV_32SC1);
int* pattern_x_ptr = h_pattern.ptr<int>(0);
int* pattern_y_ptr = h_pattern.ptr<int>(1);
for (int i = 0; i < npoints; ++i)
{ {
pattern_x_ptr[i] = pattern0[i].x; n_features_per_level_[level] = cvRound(n_desired_features_per_scale);
pattern_y_ptr[i] = pattern0[i].y; sum_n_features += n_features_per_level_[level];
n_desired_features_per_scale *= factor;
} }
} n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features;
else
{
int ntuples = descriptorSize() * 4;
initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
}
pattern_.upload(h_pattern); // pre-compute the end of a row in a circular patch
int half_patch_size = patchSize_ / 2;
blurFilter = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101); std::vector<int> u_max(half_patch_size + 2);
for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v)
blurForDescriptor = false;
}
namespace
{
inline float getScale(float scaleFactor, int firstLevel, int level)
{
return pow(scaleFactor, level - firstLevel);
}
}
void cv::cuda::ORB_CUDA::buildScalePyramids(const GpuMat& image, const GpuMat& mask)
{
CV_Assert(image.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
imagePyr_.resize(nLevels_);
maskPyr_.resize(nLevels_);
for (int level = 0; level < nLevels_; ++level)
{
float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
maskPyr_[level].setTo(Scalar::all(255));
// Compute the resized image
if (level != firstLevel_)
{ {
if (level < firstLevel_) u_max[v] = cvRound(std::sqrt(static_cast<float>(half_patch_size * half_patch_size - v * v)));
{ }
cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
if (!mask.empty()) // Make sure we are symmetric
cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR); for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v)
} {
else while (u_max[v_0] == u_max[v_0 + 1])
{ ++v_0;
cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR); u_max[v] = v_0;
++v_0;
}
CV_Assert( u_max.size() < 32 );
cv::cuda::device::orb::loadUMax(&u_max[0], static_cast<int>(u_max.size()));
if (!mask.empty()) // Calc pattern
{ const int npoints = 512;
cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR); Point pattern_buf[npoints];
cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO); const Point* pattern0 = (const Point*)bit_pattern_31_;
} if (patchSize_ != 31)
{
pattern0 = pattern_buf;
makeRandomPattern(patchSize_, pattern_buf, npoints);
}
Mat h_pattern;
if (WTA_K_ == 2)
{
h_pattern.create(2, npoints, CV_32SC1);
int* pattern_x_ptr = h_pattern.ptr<int>(0);
int* pattern_y_ptr = h_pattern.ptr<int>(1);
for (int i = 0; i < npoints; ++i)
{
pattern_x_ptr[i] = pattern0[i].x;
pattern_y_ptr[i] = pattern0[i].y;
} }
} }
else else
{ {
image.copyTo(imagePyr_[level]); int ntuples = descriptorSize() * 4;
initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints);
if (!mask.empty())
mask.copyTo(maskPyr_[level]);
} }
// Filter keypoints by image border pattern_.upload(h_pattern);
ensureSizeIsEnough(sz, CV_8UC1, buf_);
buf_.setTo(Scalar::all(0));
Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
buf_(inner).setTo(Scalar::all(255));
cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]); blurFilter_ = cuda::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101);
} }
}
namespace void ORB_Impl::detectAndCompute(InputArray _image, InputArray _mask, std::vector<KeyPoint>& keypoints, OutputArray _descriptors, bool useProvidedKeypoints)
{ {
//takes keypoints and culls them by the response CV_Assert( useProvidedKeypoints == false );
void cull(GpuMat& keypoints, int& count, int n_points)
detectAndComputeAsync(_image, _mask, d_keypoints_, _descriptors, false, Stream::Null());
convert(d_keypoints_, keypoints);
}
void ORB_Impl::detectAndComputeAsync(InputArray _image, InputArray _mask, OutputArray _keypoints, OutputArray _descriptors, bool useProvidedKeypoints, Stream& stream)
{
CV_Assert( useProvidedKeypoints == false );
buildScalePyramids(_image, _mask);
computeKeyPointsPyramid();
if (_descriptors.needed())
{
computeDescriptors(_descriptors);
}
mergeKeyPoints(_keypoints);
}
static float getScale(float scaleFactor, int firstLevel, int level)
{
return pow(scaleFactor, level - firstLevel);
}
void ORB_Impl::buildScalePyramids(InputArray _image, InputArray _mask)
{
const GpuMat image = _image.getGpuMat();
const GpuMat mask = _mask.getGpuMat();
CV_Assert( image.type() == CV_8UC1 );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
imagePyr_.resize(nLevels_);
maskPyr_.resize(nLevels_);
for (int level = 0; level < nLevels_; ++level)
{
float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level);
Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale));
ensureSizeIsEnough(sz, image.type(), imagePyr_[level]);
ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]);
maskPyr_[level].setTo(Scalar::all(255));
// Compute the resized image
if (level != firstLevel_)
{
if (level < firstLevel_)
{
cuda::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR);
if (!mask.empty())
cuda::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR);
}
else
{
cuda::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR);
if (!mask.empty())
{
cuda::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR);
cuda::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO);
}
}
}
else
{
image.copyTo(imagePyr_[level]);
if (!mask.empty())
mask.copyTo(maskPyr_[level]);
}
// Filter keypoints by image border
ensureSizeIsEnough(sz, CV_8UC1, buf_);
buf_.setTo(Scalar::all(0));
Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_);
buf_(inner).setTo(Scalar::all(255));
cuda::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]);
}
}
// takes keypoints and culls them by the response
static void cull(GpuMat& keypoints, int& count, int n_points)
{ {
using namespace cv::cuda::device::orb; using namespace cv::cuda::device::orb;
@ -557,217 +673,196 @@ namespace
count = cull_gpu(keypoints.ptr<int>(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr<float>(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points); count = cull_gpu(keypoints.ptr<int>(cuda::FastFeatureDetector::LOCATION_ROW), keypoints.ptr<float>(cuda::FastFeatureDetector::RESPONSE_ROW), count, n_points);
} }
} }
}
void cv::cuda::ORB_CUDA::computeKeyPointsPyramid() void ORB_Impl::computeKeyPointsPyramid()
{
using namespace cv::cuda::device::orb;
int half_patch_size = patchSize_ / 2;
keyPointsPyr_.resize(nLevels_);
keyPointsCount_.resize(nLevels_);
for (int level = 0; level < nLevels_; ++level)
{ {
fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area()); using namespace cv::cuda::device::orb;
GpuMat fastKpRange; int half_patch_size = patchSize_ / 2;
fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
keyPointsCount_[level] = fastKpRange.cols; keyPointsPyr_.resize(nLevels_);
keyPointsCount_.resize(nLevels_);
if (keyPointsCount_[level] == 0) fastDetector_->setThreshold(fastThreshold_);
continue;
ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]); for (int level = 0; level < nLevels_; ++level)
fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
const int n_features = static_cast<int>(n_features_per_level_[level]);
if (scoreType_ == ORB::HARRIS_SCORE)
{ {
// Keep more points than necessary as FAST does not give amazing corners fastDetector_->setMaxNumPoints(0.05 * imagePyr_[level].size().area());
cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
// Compute the Harris cornerness (better scoring than FAST) GpuMat fastKpRange;
HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0); fastDetector_->detectAsync(imagePyr_[level], fastKpRange, maskPyr_[level], Stream::Null());
keyPointsCount_[level] = fastKpRange.cols;
if (keyPointsCount_[level] == 0)
continue;
ensureSizeIsEnough(3, keyPointsCount_[level], fastKpRange.type(), keyPointsPyr_[level]);
fastKpRange.copyTo(keyPointsPyr_[level].rowRange(0, 2));
const int n_features = static_cast<int>(n_features_per_level_[level]);
if (scoreType_ == ORB::HARRIS_SCORE)
{
// Keep more points than necessary as FAST does not give amazing corners
cull(keyPointsPyr_[level], keyPointsCount_[level], 2 * n_features);
// Compute the Harris cornerness (better scoring than FAST)
HarrisResponses_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(1), keyPointsCount_[level], 7, HARRIS_K, 0);
}
//cull to the final desired level, using the new Harris scores or the original FAST scores.
cull(keyPointsPyr_[level], keyPointsCount_[level], n_features);
// Compute orientation
IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
}
}
void ORB_Impl::computeDescriptors(OutputArray _descriptors)
{
using namespace cv::cuda::device::orb;
int nAllkeypoints = 0;
for (int level = 0; level < nLevels_; ++level)
nAllkeypoints += keyPointsCount_[level];
if (nAllkeypoints == 0)
{
_descriptors.release();
return;
} }
//cull to the final desired level, using the new Harris scores or the original FAST scores. ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, _descriptors);
cull(keyPointsPyr_[level], keyPointsCount_[level], n_features); GpuMat descriptors = _descriptors.getGpuMat();
// Compute orientation int offset = 0;
IC_Angle_gpu(imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), keyPointsCount_[level], half_patch_size, 0);
}
}
void cv::cuda::ORB_CUDA::computeDescriptors(GpuMat& descriptors) for (int level = 0; level < nLevels_; ++level)
{
using namespace cv::cuda::device::orb;
int nAllkeypoints = 0;
for (int level = 0; level < nLevels_; ++level)
nAllkeypoints += keyPointsCount_[level];
if (nAllkeypoints == 0)
{
descriptors.release();
return;
}
ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors);
int offset = 0;
for (int level = 0; level < nLevels_; ++level)
{
if (keyPointsCount_[level] == 0)
continue;
GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
if (blurForDescriptor)
{ {
// preprocess the resized image if (keyPointsCount_[level] == 0)
ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); continue;
blurFilter->apply(imagePyr_[level], buf_);
GpuMat descRange = descriptors.rowRange(offset, offset + keyPointsCount_[level]);
if (blurForDescriptor_)
{
// preprocess the resized image
ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_);
blurFilter_->apply(imagePyr_[level], buf_);
}
computeOrbDescriptor_gpu(blurForDescriptor_ ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2),
keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0);
offset += keyPointsCount_[level];
}
}
void ORB_Impl::mergeKeyPoints(OutputArray _keypoints)
{
using namespace cv::cuda::device::orb;
int nAllkeypoints = 0;
for (int level = 0; level < nLevels_; ++level)
nAllkeypoints += keyPointsCount_[level];
if (nAllkeypoints == 0)
{
_keypoints.release();
return;
} }
computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr<short2>(0), keyPointsPyr_[level].ptr<float>(2), ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, _keypoints);
keyPointsCount_[level], pattern_.ptr<int>(0), pattern_.ptr<int>(1), descRange, descriptorSize(), WTA_K_, 0); GpuMat& keypoints = _keypoints.getGpuMatRef();
offset += keyPointsCount_[level]; int offset = 0;
for (int level = 0; level < nLevels_; ++level)
{
if (keyPointsCount_[level] == 0)
continue;
float sf = getScale(scaleFactor_, firstLevel_, level);
GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]);
float locScale = level != firstLevel_ ? sf : 1.0f;
mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0);
GpuMat range = keyPointsRange.rowRange(2, 4);
keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range);
keyPointsRange.row(4).setTo(Scalar::all(level));
keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
offset += keyPointsCount_[level];
}
} }
}
void cv::cuda::ORB_CUDA::mergeKeyPoints(GpuMat& keypoints) void ORB_Impl::convert(InputArray _gpu_keypoints, std::vector<KeyPoint>& keypoints)
{
using namespace cv::cuda::device::orb;
int nAllkeypoints = 0;
for (int level = 0; level < nLevels_; ++level)
nAllkeypoints += keyPointsCount_[level];
if (nAllkeypoints == 0)
{ {
keypoints.release(); if (_gpu_keypoints.empty())
return; {
} keypoints.clear();
return;
}
ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints); Mat h_keypoints;
if (_gpu_keypoints.kind() == _InputArray::CUDA_GPU_MAT)
{
_gpu_keypoints.getGpuMat().download(h_keypoints);
}
else
{
h_keypoints = _gpu_keypoints.getMat();
}
int offset = 0; CV_Assert( h_keypoints.rows == ROWS_COUNT );
CV_Assert( h_keypoints.type() == CV_32FC1 );
for (int level = 0; level < nLevels_; ++level) const int npoints = h_keypoints.cols;
{
if (keyPointsCount_[level] == 0)
continue;
float sf = getScale(scaleFactor_, firstLevel_, level); keypoints.resize(npoints);
GpuMat keyPointsRange = keypoints.colRange(offset, offset + keyPointsCount_[level]); const float* x_ptr = h_keypoints.ptr<float>(X_ROW);
const float* y_ptr = h_keypoints.ptr<float>(Y_ROW);
const float* response_ptr = h_keypoints.ptr<float>(RESPONSE_ROW);
const float* angle_ptr = h_keypoints.ptr<float>(ANGLE_ROW);
const float* octave_ptr = h_keypoints.ptr<float>(OCTAVE_ROW);
const float* size_ptr = h_keypoints.ptr<float>(SIZE_ROW);
float locScale = level != firstLevel_ ? sf : 1.0f; for (int i = 0; i < npoints; ++i)
{
KeyPoint kp;
mergeLocation_gpu(keyPointsPyr_[level].ptr<short2>(0), keyPointsRange.ptr<float>(0), keyPointsRange.ptr<float>(1), keyPointsCount_[level], locScale, 0); kp.pt.x = x_ptr[i];
kp.pt.y = y_ptr[i];
kp.response = response_ptr[i];
kp.angle = angle_ptr[i];
kp.octave = static_cast<int>(octave_ptr[i]);
kp.size = size_ptr[i];
GpuMat range = keyPointsRange.rowRange(2, 4); keypoints[i] = kp;
keyPointsPyr_[level](Range(1, 3), Range(0, keyPointsCount_[level])).copyTo(range); }
keyPointsRange.row(4).setTo(Scalar::all(level));
keyPointsRange.row(5).setTo(Scalar::all(patchSize_ * sf));
offset += keyPointsCount_[level];
} }
} }
void cv::cuda::ORB_CUDA::downloadKeyPoints(const GpuMat &d_keypoints, std::vector<KeyPoint>& keypoints) Ptr<cv::cuda::ORB> cv::cuda::ORB::create(int nfeatures,
float scaleFactor,
int nlevels,
int edgeThreshold,
int firstLevel,
int WTA_K,
int scoreType,
int patchSize,
int fastThreshold,
bool blurForDescriptor)
{ {
if (d_keypoints.empty()) return makePtr<ORB_Impl>(nfeatures, scaleFactor, nlevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize, fastThreshold, blurForDescriptor);
{
keypoints.clear();
return;
}
Mat h_keypoints(d_keypoints);
convertKeyPoints(h_keypoints, keypoints);
}
void cv::cuda::ORB_CUDA::convertKeyPoints(const Mat &d_keypoints, std::vector<KeyPoint>& keypoints)
{
if (d_keypoints.empty())
{
keypoints.clear();
return;
}
CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT);
const float* x_ptr = d_keypoints.ptr<float>(X_ROW);
const float* y_ptr = d_keypoints.ptr<float>(Y_ROW);
const float* response_ptr = d_keypoints.ptr<float>(RESPONSE_ROW);
const float* angle_ptr = d_keypoints.ptr<float>(ANGLE_ROW);
const float* octave_ptr = d_keypoints.ptr<float>(OCTAVE_ROW);
const float* size_ptr = d_keypoints.ptr<float>(SIZE_ROW);
keypoints.resize(d_keypoints.cols);
for (int i = 0; i < d_keypoints.cols; ++i)
{
KeyPoint kp;
kp.pt.x = x_ptr[i];
kp.pt.y = y_ptr[i];
kp.response = response_ptr[i];
kp.angle = angle_ptr[i];
kp.octave = static_cast<int>(octave_ptr[i]);
kp.size = size_ptr[i];
keypoints[i] = kp;
}
}
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints)
{
buildScalePyramids(image, mask);
computeKeyPointsPyramid();
mergeKeyPoints(keypoints);
}
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors)
{
buildScalePyramids(image, mask);
computeKeyPointsPyramid();
computeDescriptors(descriptors);
mergeKeyPoints(keypoints);
}
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
{
(*this)(image, mask, d_keypoints_);
downloadKeyPoints(d_keypoints_, keypoints);
}
void cv::cuda::ORB_CUDA::operator()(const GpuMat& image, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors)
{
(*this)(image, mask, d_keypoints_, descriptors);
downloadKeyPoints(d_keypoints_, keypoints);
}
void cv::cuda::ORB_CUDA::release()
{
imagePyr_.clear();
maskPyr_.clear();
buf_.release();
keyPointsPyr_.clear();
d_keypoints_.release();
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */

View File

@ -122,7 +122,7 @@ namespace
IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool) IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool)
} }
CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE) CV_ENUM(ORB_ScoreType, cv::ORB::HARRIS_SCORE, cv::ORB::FAST_SCORE)
PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor) PARAM_TEST_CASE(ORB, cv::cuda::DeviceInfo, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor)
{ {
@ -162,8 +162,9 @@ CUDA_TEST_P(ORB, Accuracy)
cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1)); cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1));
mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0)); mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0));
cv::cuda::ORB_CUDA orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); cv::Ptr<cv::cuda::ORB> orb =
orb.blurForDescriptor = blurForDescriptor; cv::cuda::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel,
WTA_K, scoreType, patchSize, 20, blurForDescriptor);
if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS)) if (!supportFeature(devInfo, cv::cuda::GLOBAL_ATOMICS))
{ {
@ -171,7 +172,7 @@ CUDA_TEST_P(ORB, Accuracy)
{ {
std::vector<cv::KeyPoint> keypoints; std::vector<cv::KeyPoint> keypoints;
cv::cuda::GpuMat descriptors; cv::cuda::GpuMat descriptors;
orb(loadMat(image), loadMat(mask), keypoints, descriptors); orb->detectAndComputeAsync(loadMat(image), loadMat(mask), keypoints, descriptors);
} }
catch (const cv::Exception& e) catch (const cv::Exception& e)
{ {
@ -182,7 +183,7 @@ CUDA_TEST_P(ORB, Accuracy)
{ {
std::vector<cv::KeyPoint> keypoints; std::vector<cv::KeyPoint> keypoints;
cv::cuda::GpuMat descriptors; cv::cuda::GpuMat descriptors;
orb(loadMat(image), loadMat(mask), keypoints, descriptors); orb->detectAndCompute(loadMat(image), loadMat(mask), keypoints, descriptors);
cv::Ptr<cv::ORB> orb_gold = cv::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); cv::Ptr<cv::ORB> orb_gold = cv::ORB::create(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);

View File

@ -350,15 +350,15 @@ TEST(ORB)
orb->detectAndCompute(src, Mat(), keypoints, descriptors); orb->detectAndCompute(src, Mat(), keypoints, descriptors);
CPU_OFF; CPU_OFF;
cuda::ORB_CUDA d_orb; Ptr<cuda::ORB> d_orb = cuda::ORB::create();
cuda::GpuMat d_src(src); cuda::GpuMat d_src(src);
cuda::GpuMat d_keypoints; cuda::GpuMat d_keypoints;
cuda::GpuMat d_descriptors; cuda::GpuMat d_descriptors;
d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_ON; CUDA_ON;
d_orb(d_src, cuda::GpuMat(), d_keypoints, d_descriptors); d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
CUDA_OFF; CUDA_OFF;
} }