made GPU version of SURF more consistent with CPU one

This commit is contained in:
Vladislav Vinogradov 2011-03-10 13:53:58 +00:00
parent c067c633f0
commit 58f6919795
7 changed files with 860 additions and 1135 deletions

@ -1537,83 +1537,55 @@ namespace cv
////////////////////////////////// SURF //////////////////////////////////////////
SURFParams_GPU() : threshold(0.1f), nOctaves(4), nIntervals(4), initialScale(2.f),
l1(3.f/1.5f), l2(5.f/1.5f), l3(3.f/1.5f), l4(1.f/1.5f),
edgeScale(0.81f), initialStep(1), extended(true), featuresRatio(0.01f) {}
//! The interest operator threshold
float threshold;
//! The number of octaves to process
int nOctaves;
//! The number of intervals in each octave
int nIntervals;
//! The scale associated with the first interval of the first octave
float initialScale;
//! mask parameter l_1
float l1;
//! mask parameter l_2
float l2;
//! mask parameter l_3
float l3;
//! mask parameter l_4
float l4;
//! The amount to scale the edge rejection mask
float edgeScale;
//! The initial sampling step in pixels.
int initialStep;
//! True, if generate 128-len descriptors, false - 64-len descriptors
bool extended;
//! max features = featuresRatio * img.size().srea()
float featuresRatio;
class CV_EXPORTS SURF_GPU : public SURFParams_GPU
class CV_EXPORTS SURF_GPU : public CvSURFParams
//! the default constructor
//! the full constructor taking all the necessary parameters
explicit SURF_GPU(double _hessianThreshold, int _nOctaves=4,
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f);
//! returns the descriptor size in float's (64 or 128)
int descriptorSize() const;
//! upload host keypoints to device memory
static void uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMat& keypointsGPU);
void uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMat& keypointsGPU);
//! download keypoints from device to host memory
static void downloadKeypoints(const GpuMat& keypointsGPU, vector<KeyPoint>& keypoints);
void downloadKeypoints(const GpuMat& keypointsGPU, vector<KeyPoint>& keypoints);
//! download descriptors from device to host memory
static void downloadDescriptors(const GpuMat& descriptorsGPU, vector<float>& descriptors);
void downloadDescriptors(const GpuMat& descriptorsGPU, vector<float>& descriptors);
//! finds the keypoints using fast hessian detector used in SURF
//! supports CV_8UC1 images
//! keypoints will have 1 row and type CV_32FC(6)
//!<float[6]>(1, i) contains i'th keypoint
//! format: (x, y, size, response, angle, octave)
//! format: (x, y, laplacian, size, dir, hessian)
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints);
//! finds the keypoints and computes their descriptors.
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
void operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false, bool calcOrientation = true);
bool useProvidedKeypoints = false);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints = false, bool calcOrientation = true);
bool useProvidedKeypoints = false);
void operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
bool useProvidedKeypoints = false, bool calcOrientation = true);
bool useProvidedKeypoints = false);
GpuMat sum;
GpuMat sumf;
//! max keypoints = keypointsRatio * img.size().area()
float keypointsRatio;
GpuMat mask1;
GpuMat maskSum;
GpuMat sum, mask1, maskSum, intBuffer;
GpuMat det, trace;
GpuMat hessianBuffer;
GpuMat maxPosBuffer;
GpuMat featuresBuffer;
GpuMat keypointsBuffer;

@ -111,20 +111,20 @@ namespace cv
float x;
float y;
float laplacian;
float size;
float response;
float angle;
float octave;
float dir;
float hessian;
enum KeypointLayout

File diff suppressed because it is too large Load Diff

@ -48,123 +48,93 @@ using namespace std;
#if !defined (HAVE_CUDA)
cv::gpu::SURF_GPU::SURF_GPU() { throw_nogpu(); }
cv::gpu::SURF_GPU::SURF_GPU(double, int, int, bool, float) { throw_nogpu(); }
int cv::gpu::SURF_GPU::descriptorSize() const { throw_nogpu(); return 0;}
void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>&, GpuMat&) { throw_nogpu(); }
void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat&, vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::SURF_GPU::downloadDescriptors(const GpuMat&, vector<float>&) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, bool) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&, GpuMat&, bool, bool) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&, vector<float>&, bool, bool) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&, GpuMat&, bool) { throw_nogpu(); }
void cv::gpu::SURF_GPU::operator()(const GpuMat&, const GpuMat&, vector<KeyPoint>&, vector<float>&, bool) { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace surf
dim3 calcBlockSize(int nIntervals);
void fasthessian_gpu(PtrStepf hessianBuffer, int x_size, int y_size, const dim3& threads);
void fasthessian_gpu_old(PtrStepf hessianBuffer, int x_size, int y_size, const dim3& threadsOld);
void nonmaxonly_gpu(PtrStepf hessianBuffer, int4* maxPosBuffer, unsigned int& maxCounter,
int x_size, int y_size, bool use_mask, const dim3& threads);
void fh_interp_extremum_gpu(PtrStepf hessianBuffer, const int4* maxPosBuffer, unsigned int maxCounter,
KeyPoint_GPU* featuresBuffer, unsigned int& featureCounter);
void find_orientation_gpu(KeyPoint_GPU* features, int nFeatures);
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols, int octave, int nOctaveLayers);
void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4* maxPosBuffer, unsigned int* maxCounter,
int img_rows, int img_cols, int octave, bool use_mask, int nLayers);
void icvInterpolateKeypoint_gpu(const PtrStepf& det, const int4* maxPosBuffer, unsigned int maxCounter, KeyPoint_GPU* featuresBuffer, unsigned int* featureCounter);
void icvCalcOrientation_gpu(const KeyPoint_GPU* featureBuffer, int nFeatures, KeyPoint_GPU* keypoints, unsigned int* keypointCounter);
void compute_descriptors_gpu(const DevMem2Df& descriptors, const KeyPoint_GPU* features, int nFeatures);
void compute_descriptors_gpu_old(const DevMem2Df& descriptors, const KeyPoint_GPU* features, int nFeatures);
using namespace cv::gpu::surf;
class SURF_GPU_Invoker : private SURFParams_GPU
class SURF_GPU_Invoker : private CvSURFParams
SURF_GPU_Invoker(SURF_GPU& surf, const GpuMat& img, const GpuMat& mask) :
SURF_GPU_Invoker(SURF_GPU& surf, const GpuMat& img, const GpuMat& mask) :
sum(surf.sum), sumf(surf.sumf),
sum(surf.sum), mask1(surf.mask1), maskSum(surf.maskSum), intBuffer(surf.intBuffer), det(surf.det), trace(surf.trace),
mask1(surf.mask1), maskSum(surf.maskSum),
maxPosBuffer(surf.maxPosBuffer), featuresBuffer(surf.featuresBuffer), keypointsBuffer(surf.keypointsBuffer),
img_cols(img.cols), img_rows(img.rows),
mask_width(0), mask_height(0),
featureCounter(0), maxCounter(0)
CV_Assert(!img.empty() && img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
CV_Assert(nOctaves > 0 && nIntervals > 2 && nIntervals < 22);
CV_Assert(nOctaves > 0 && nOctaveLayers > 0);
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
max_features = static_cast<int>(img.size().area() * featuresRatio);
max_candidates = static_cast<int>(1.5 * max_features);
maxKeypoints = static_cast<int>(img.size().area() * surf.keypointsRatio);
maxFeatures = static_cast<int>(1.5 * maxKeypoints);
maxCandidates = static_cast<int>(1.5 * maxFeatures);
CV_Assert(max_features > 0);
featuresBuffer.create(1, max_features, CV_32FC(6));
maxPosBuffer.create(1, max_candidates, CV_32SC4);
mask_width = l2 * 0.5f;
mask_height = 1.0f + l1;
// Dxy gap half-width
float dxy_center_offset = 0.5f * (l4 + l3);
// Dxy squares half-width
float dxy_half_width = 0.5f * l3;
// rescale edge_scale to fit with the filter dimensions
float dxy_scale = edgeScale * std::pow((2.f + 2.f * l1) * l2 / (4.f * l3 * l3), 2.f);
CV_Assert(maxKeypoints > 0);
// Compute border required such that the filters don't overstep the image boundaries
float smax0 = 2.0f * initialScale + 0.5f;
int border0 = static_cast<int>(std::ceil(smax0 * std::max(std::max(mask_width, mask_height), l3 + l4 * 0.5f)));
cudaSafeCall( cudaMalloc((void**)&d_counters, (nOctaves + 2) * sizeof(unsigned int)) );
cudaSafeCall( cudaMemset(d_counters, 0, (nOctaves + 2) * sizeof(unsigned int)) );
int width0 = (img_cols - 2 * border0) / initialStep;
int height0 = (img_rows - 2 * border0) / initialStep;
uploadConstant("cv::gpu::surf::c_max_candidates", maxCandidates);
uploadConstant("cv::gpu::surf::c_max_features", maxFeatures);
uploadConstant("cv::gpu::surf::c_max_keypoints", maxKeypoints);
uploadConstant("cv::gpu::surf::c_img_rows", img_rows);
uploadConstant("cv::gpu::surf::c_img_cols", img_cols);
uploadConstant("cv::gpu::surf::c_nOctaveLayers", nOctaveLayers);
uploadConstant("cv::gpu::surf::c_hessianThreshold", static_cast<float>(hessianThreshold));
uploadConstant("cv::gpu::surf::c_max_candidates", max_candidates);
uploadConstant("cv::gpu::surf::c_max_features", max_features);
uploadConstant("cv::gpu::surf::c_nIntervals", nIntervals);
uploadConstant("cv::gpu::surf::c_mask_width", mask_width);
uploadConstant("cv::gpu::surf::c_mask_height", mask_height);
uploadConstant("cv::gpu::surf::c_dxy_center_offset", dxy_center_offset);
uploadConstant("cv::gpu::surf::c_dxy_half_width", dxy_half_width);
uploadConstant("cv::gpu::surf::c_dxy_scale", dxy_scale);
uploadConstant("cv::gpu::surf::c_initialScale", initialScale);
uploadConstant("cv::gpu::surf::c_threshold", threshold);
hessianBuffer.create(height0 * nIntervals, width0, CV_32F);
bindTexture("cv::gpu::surf::imgTex", (DevMem2D)img);
integral(img, sum);
sum.convertTo(sumf, CV_32F, 1.0 / 255.0);
bindTexture("cv::gpu::surf::sumTex", (DevMem2Df)sumf);
integralBuffered(img, sum, intBuffer);
bindTexture("cv::gpu::surf::sumTex", (DevMem2D_<unsigned int>)sum);
if (!mask.empty())
if (use_mask)
min(mask, 1.0, mask1);
integral(mask1, maskSum);
bindTexture("cv::gpu::surf::maskSumTex", (DevMem2Di)maskSum);
integralBuffered(mask1, maskSum, intBuffer);
bindTexture("cv::gpu::surf::maskSumTex", (DevMem2D_<unsigned int>)maskSum);
cudaSafeCall( cudaFree(d_counters) );
if (use_mask)
@ -172,102 +142,115 @@ namespace
void detectKeypoints(GpuMat& keypoints)
typedef void (*fasthessian_t)(PtrStepf hessianBuffer, int x_size, int y_size, const dim3& threads);
const fasthessian_t fasthessian =
DeviceInfo().supports(FEATURE_SET_COMPUTE_13) ? fasthessian_gpu : fasthessian_gpu_old;
ensureSizeIsEnough(img_rows * (nOctaveLayers + 2), img_cols, CV_32FC1, det);
ensureSizeIsEnough(img_rows * (nOctaveLayers + 2), img_cols, CV_32FC1, trace);
ensureSizeIsEnough(1, maxCandidates, CV_32SC4, maxPosBuffer);
ensureSizeIsEnough(1, maxFeatures, CV_32FC(6), featuresBuffer);
dim3 threads = calcBlockSize(nIntervals);
for(int octave = 0; octave < nOctaves; ++octave)
for (int octave = 0; octave < nOctaves; ++octave)
int step = initialStep * (1 << octave);
const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave;
// Compute border required such that the filters don't overstep the image boundaries
float d = (initialScale * (1 << octave)) / (nIntervals - 2);
float smax = initialScale * (1 << octave) + d * (nIntervals - 2.0f) + 0.5f;
int border = static_cast<int>(std::ceil(smax * std::max(std::max(mask_width, mask_height), l3 + l4 * 0.5f)));
int x_size = (img_cols - 2 * border) / step;
int y_size = (img_rows - 2 * border) / step;
if (x_size <= 0 || y_size <= 0)
uploadConstant("cv::gpu::surf::c_octave", octave);
uploadConstant("cv::gpu::surf::c_layer_rows", layer_rows);
uploadConstant("cv::gpu::surf::c_layer_cols", layer_cols);
uploadConstant("cv::gpu::surf::c_octave", octave);
uploadConstant("cv::gpu::surf::c_x_size", x_size);
uploadConstant("cv::gpu::surf::c_y_size", y_size);
uploadConstant("cv::gpu::surf::c_border", border);
uploadConstant("cv::gpu::surf::c_step", step);
icvCalcLayerDetAndTrace_gpu(det, trace, img_rows, img_cols, octave, nOctaveLayers);
fasthessian(hessianBuffer, x_size, y_size, threads);
icvFindMaximaInLayer_gpu(det, trace, maxPosBuffer.ptr<int4>(), d_counters + 2 + octave,
img_rows, img_cols, octave, use_mask, nOctaveLayers);
// Reset the candidate count.
maxCounter = 0;
nonmaxonly_gpu(hessianBuffer, maxPosBuffer.ptr<int4>(), maxCounter, x_size, y_size, use_mask, threads);
maxCounter = std::min(maxCounter, static_cast<unsigned int>(max_candidates));
unsigned int maxCounter;
cudaSafeCall( cudaMemcpy(&maxCounter, d_counters + 2 + octave, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
if (maxCounter > 0)
fh_interp_extremum_gpu(hessianBuffer, maxPosBuffer.ptr<int4>(), maxCounter,
featuresBuffer.ptr<KeyPoint_GPU>(), featureCounter);
featureCounter = std::min(featureCounter, static_cast<unsigned int>(max_features));
icvInterpolateKeypoint_gpu(det, maxPosBuffer.ptr<int4>(), maxCounter,
featuresBuffer.ptr<KeyPoint_GPU>(), d_counters);
unsigned int featureCounter;
cudaSafeCall( cudaMemcpy(&featureCounter, d_counters, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
if (featureCounter > 0)
featuresBuffer.colRange(0, featureCounter).copyTo(keypoints);
findOrientation(featuresBuffer.colRange(0, featureCounter), keypoints);
void findOrientation(GpuMat& keypoints)
void findOrientation(const GpuMat& features, GpuMat& keypoints)
if (keypoints.cols > 0)
find_orientation_gpu(keypoints.ptr<KeyPoint_GPU>(), keypoints.cols);
if (features.cols > 0)
ensureSizeIsEnough(1, maxKeypoints, CV_32FC(6), keypointsBuffer);
icvCalcOrientation_gpu(features.ptr<KeyPoint_GPU>(), features.cols, keypointsBuffer.ptr<KeyPoint_GPU>(),
d_counters + 1);
unsigned int keypointsCounter;
cudaSafeCall( cudaMemcpy(&keypointsCounter, d_counters + 1, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
keypointsCounter = std::min(keypointsCounter, static_cast<unsigned int>(maxKeypoints));
if (keypointsCounter > 0)
keypointsBuffer.colRange(0, keypointsCounter).copyTo(keypoints);
void computeDescriptors(const GpuMat& keypoints, GpuMat& descriptors, int descriptorSize)
typedef void (*compute_descriptors_t)(const DevMem2Df& descriptors,
const KeyPoint_GPU* features, int nFeatures);
const compute_descriptors_t compute_descriptors = compute_descriptors_gpu_old;
//DeviceInfo().supports(FEATURE_SET_COMPUTE_13) ? compute_descriptors_gpu : compute_descriptors_gpu_old;
if (keypoints.cols > 0)
descriptors.create(keypoints.cols, descriptorSize, CV_32F);
compute_descriptors(descriptors, keypoints.ptr<KeyPoint_GPU>(), keypoints.cols);
compute_descriptors_gpu(descriptors, keypoints.ptr<KeyPoint_GPU>(), keypoints.cols);
GpuMat& sum;
GpuMat& sumf;
GpuMat& mask1;
GpuMat& maskSum;
GpuMat& intBuffer;
GpuMat& det;
GpuMat& trace;
GpuMat& hessianBuffer;
GpuMat& maxPosBuffer;
GpuMat& featuresBuffer;
GpuMat& keypointsBuffer;
int img_cols, img_rows;
bool use_mask;
float mask_width, mask_height;
unsigned int featureCounter;
unsigned int maxCounter;
int maxCandidates;
int maxFeatures;
int maxKeypoints;
int max_candidates;
int max_features;
unsigned int* d_counters;
hessianThreshold = 100;
extended = 1;
nOctaves = 4;
nOctaveLayers = 2;
keypointsRatio = 0.01f;
cv::gpu::SURF_GPU::SURF_GPU(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio)
hessianThreshold = _threshold;
extended = _extended;
nOctaves = _nOctaves;
nOctaveLayers = _nOctaveLayers;
keypointsRatio = _keypointsRatio;
int cv::gpu::SURF_GPU::descriptorSize() const
return extended ? 128 : 64;
@ -281,27 +264,64 @@ void cv::gpu::SURF_GPU::uploadKeypoints(const vector<KeyPoint>& keypoints, GpuMa
Mat keypointsCPU(1, keypoints.size(), CV_32FC(6));
const KeyPoint* keypoints_ptr = &keypoints[0];
KeyPoint_GPU* keypointsCPU_ptr = keypointsCPU.ptr<KeyPoint_GPU>();
for (size_t i = 0; i < keypoints.size(); ++i, ++keypoints_ptr, ++keypointsCPU_ptr)
for (size_t i = 0; i < keypoints.size(); ++i)
const KeyPoint& kp = *keypoints_ptr;
KeyPoint_GPU& gkp = *keypointsCPU_ptr;
const KeyPoint& kp = keypoints[i];
KeyPoint_GPU& gkp = keypointsCPU.ptr<KeyPoint_GPU>()[i];
gkp.x =;
gkp.y =;
gkp.laplacian = 1.0f;
gkp.size = kp.size;
gkp.octave = static_cast<float>(kp.octave);
gkp.angle = kp.angle;
gkp.response = kp.response;
gkp.dir = kp.angle;
gkp.hessian = kp.response;
int calcSize(int octave, int layer)
/* Wavelet size at first layer of first octave. */
const int HAAR_SIZE0 = 9;
/* Wavelet size increment between layers. This should be an even number,
such that the wavelet sizes in an octave are either all even or all odd.
This ensures that when looking for the neighbours of a sample, the layers
above and below are aligned correctly. */
const int HAAR_SIZE_INC = 6;
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
int getPointOctave(const KeyPoint_GPU& kpt, const CvSURFParams& params)
int best_octave = 0;
float min_diff = numeric_limits<float>::max();
for (int octave = 1; octave < params.nOctaves; ++octave)
for (int layer = 0; layer < params.nOctaveLayers; ++layer)
float diff = std::abs(kpt.size - (float)calcSize(octave, layer));
if (min_diff > diff)
min_diff = diff;
best_octave = octave;
if (min_diff == 0)
return best_octave;
return best_octave;
void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<KeyPoint>& keypoints)
if (keypointsGPU.empty())
@ -313,21 +333,23 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
Mat keypointsCPU = keypointsGPU;
KeyPoint* keypoints_ptr = &keypoints[0];
const KeyPoint_GPU* keypointsCPU_ptr = keypointsCPU.ptr<KeyPoint_GPU>();
for (int i = 0; i < keypointsGPU.cols; ++i, ++keypoints_ptr, ++keypointsCPU_ptr)
for (int i = 0; i < keypointsGPU.cols; ++i)
KeyPoint& kp = *keypoints_ptr;
const KeyPoint_GPU& gkp = *keypointsCPU_ptr;
KeyPoint& kp = keypoints[i];
const KeyPoint_GPU& gkp = keypointsCPU.ptr<KeyPoint_GPU>()[i]; = gkp.x; = gkp.y;
kp.size = gkp.size;
kp.octave = static_cast<int>(gkp.octave);
kp.angle = gkp.angle;
kp.response = gkp.response;
kp.angle = gkp.dir;
kp.response = gkp.hessian;
kp.octave = getPointOctave(gkp, *this);
kp.class_id = static_cast<int>(gkp.laplacian);
@ -353,23 +375,24 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat
SURF_GPU_Invoker surf(*this, img, mask);
void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints, bool calcOrientation)
bool useProvidedKeypoints)
if (!img.empty())
SURF_GPU_Invoker surf(*this, img, mask);
if (!useProvidedKeypoints)
if (calcOrientation)
GpuMat keypointsBuf;
surf.findOrientation(keypoints, keypointsBuf);
surf.computeDescriptors(keypoints, descriptors, descriptorSize());
@ -385,24 +408,24 @@ void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector
void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector<KeyPoint>& keypoints,
GpuMat& descriptors, bool useProvidedKeypoints, bool calcOrientation)
GpuMat& descriptors, bool useProvidedKeypoints)
GpuMat keypointsGPU;
if (useProvidedKeypoints)
uploadKeypoints(keypoints, keypointsGPU);
(*this)(img, mask, keypointsGPU, descriptors, useProvidedKeypoints, calcOrientation);
(*this)(img, mask, keypointsGPU, descriptors, useProvidedKeypoints);
downloadKeypoints(keypointsGPU, keypoints);
void cv::gpu::SURF_GPU::operator()(const GpuMat& img, const GpuMat& mask, vector<KeyPoint>& keypoints,
vector<float>& descriptors, bool useProvidedKeypoints, bool calcOrientation)
vector<float>& descriptors, bool useProvidedKeypoints)
GpuMat descriptorsGPU;
(*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints, calcOrientation);
(*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);
downloadDescriptors(descriptorsGPU, descriptors);

@ -48,7 +48,6 @@ using namespace std;
const string FEATURES2D_DIR = "features2d";
const string IMAGE_FILENAME = "aloe.png";
const string VALID_FILE_NAME = "surf.xml.gz";
class CV_GPU_SURFTest : public cvtest::BaseTest
@ -59,17 +58,20 @@ public:
bool isSimilarKeypoints(const KeyPoint& p1, const KeyPoint& p2);
int getValidCount(const vector<KeyPoint>& keypoints1, const vector<KeyPoint>& keypoints2, const vector<DMatch>& matches);
void compareKeypointSets(const vector<KeyPoint>& validKeypoints, const vector<KeyPoint>& calcKeypoints,
const Mat& validDescriptors, const Mat& calcDescriptors);
void emptyDataTest(SURF_GPU& fdetector);
void regressionTest(SURF_GPU& fdetector);
void emptyDataTest();
void accuracyTest();
virtual void run(int);
void CV_GPU_SURFTest::emptyDataTest(SURF_GPU& fdetector)
void CV_GPU_SURFTest::emptyDataTest()
SURF_GPU fdetector;
GpuMat image;
vector<KeyPoint> keypoints;
vector<float> descriptors;
@ -114,116 +116,80 @@ bool CV_GPU_SURFTest::isSimilarKeypoints(const KeyPoint& p1, const KeyPoint& p2)
p1.class_id == p2.class_id );
int CV_GPU_SURFTest::getValidCount(const vector<KeyPoint>& keypoints1, const vector<KeyPoint>& keypoints2,
const vector<DMatch>& matches)
int count = 0;
for (size_t i = 0; i < matches.size(); ++i)
const DMatch& m = matches[i];
const KeyPoint& kp1 = keypoints1[m.queryIdx];
const KeyPoint& kp2 = keypoints2[m.trainIdx];
if (isSimilarKeypoints(kp1, kp2))
return count;
void CV_GPU_SURFTest::compareKeypointSets(const vector<KeyPoint>& validKeypoints, const vector<KeyPoint>& calcKeypoints,
const Mat& validDescriptors, const Mat& calcDescriptors)
if (validKeypoints.size() != calcKeypoints.size())
BruteForceMatcher< L2<float> > matcher;
vector<DMatch> matches;
matcher.match(validDescriptors, calcDescriptors, matches);
int validCount = getValidCount(validKeypoints, calcKeypoints, matches);
float validRatio = (float)validCount / matches.size();
if (validRatio < 0.5f)
ts->printf(cvtest::TS::LOG, "Keypoints sizes doesn't equal (validCount = %d, calcCount = %d).\n",
validKeypoints.size(), calcKeypoints.size());
ts->printf(cvtest::TS::LOG, "Bad accuracy - %f.\n", validRatio);
ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
if (validDescriptors.size() != calcDescriptors.size())
ts->printf(cvtest::TS::LOG, "Descriptors sizes doesn't equal.\n");
for (size_t v = 0; v < validKeypoints.size(); v++)
int nearestIdx = -1;
float minDist = std::numeric_limits<float>::max();
for (size_t c = 0; c < calcKeypoints.size(); c++)
float curDist = (float)norm(calcKeypoints[c].pt - validKeypoints[v].pt);
if (curDist < minDist)
minDist = curDist;
nearestIdx = c;
assert(minDist >= 0);
if (!isSimilarKeypoints(validKeypoints[v], calcKeypoints[nearestIdx]))
ts->printf(cvtest::TS::LOG, "Bad keypoints accuracy.\n");
ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
if (norm(validDescriptors.row(v), calcDescriptors.row(nearestIdx), NORM_L2) > 1.5f)
ts->printf(cvtest::TS::LOG, "Bad descriptors accuracy.\n");
ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY );
void CV_GPU_SURFTest::regressionTest(SURF_GPU& fdetector)
void CV_GPU_SURFTest::accuracyTest()
string imgFilename = string(ts->get_data_path()) + FEATURES2D_DIR + "/" + IMAGE_FILENAME;
string resFilename = string(ts->get_data_path()) + FEATURES2D_DIR + "/" + VALID_FILE_NAME;
// Read the test image.
GpuMat image(imread(imgFilename, 0));
Mat image = imread(imgFilename, 0);
if (image.empty())
ts->printf( cvtest::TS::LOG, "Image %s can not be read.\n", imgFilename.c_str() );
ts->set_failed_test_info( cvtest::TS::FAIL_INVALID_TEST_DATA );
FileStorage fs(resFilename, FileStorage::READ);
Mat mask(image.size(), CV_8UC1, Scalar::all(1));
mask(Range(0, image.rows / 2), Range(0, image.cols / 2)).setTo(Scalar::all(0));
// Compute keypoints.
GpuMat mask(image.size(), CV_8UC1, Scalar::all(1));
mask(Range(0, image.rows / 2), Range(0, image.cols / 2)).setTo(Scalar::all(0));
vector<KeyPoint> calcKeypoints;
GpuMat calcDespcriptors;
fdetector(image, mask, calcKeypoints, calcDespcriptors);
GpuMat calcDescriptors;
SURF_GPU fdetector; fdetector.extended = false;
fdetector(GpuMat(image), GpuMat(mask), calcKeypoints, calcDescriptors);
if (fs.isOpened()) // Compare computed and valid keypoints.
// Read validation keypoints set.
vector<KeyPoint> validKeypoints;
Mat validDespcriptors;
read(fs["keypoints"], validKeypoints);
read(fs["descriptors"], validDespcriptors);
if (validKeypoints.empty() || validDespcriptors.empty())
ts->printf(cvtest::TS::LOG, "Validation file can not be read.\n");
// Calc validation keypoints set.
vector<KeyPoint> validKeypoints;
vector<float> validDescriptors;
SURF fdetector_gold; fdetector_gold.extended = false;
fdetector_gold(image, mask, validKeypoints, validDescriptors);
compareKeypointSets(validKeypoints, calcKeypoints, validDespcriptors, calcDespcriptors);
else // Write detector parameters and computed keypoints as validation data.
{, FileStorage::WRITE);
if (!fs.isOpened())
ts->printf(cvtest::TS::LOG, "File %s can not be opened to write.\n", resFilename.c_str());
write(fs, "keypoints", calcKeypoints);
write(fs, "descriptors", (Mat)calcDespcriptors);
compareKeypointSets(validKeypoints, calcKeypoints,
Mat(validKeypoints.size(), fdetector_gold.descriptorSize(), CV_32F, &validDescriptors[0]), calcDescriptors);
void CV_GPU_SURFTest::run( int /*start_from*/ )
SURF_GPU fdetector;
TEST(SURF, empty_data_and_regression) { CV_GPU_SURFTest test; test.safe_run(); }
TEST(SURF, empty_data_and_accuracy) { CV_GPU_SURFTest test; test.safe_run(); }

@ -264,10 +264,11 @@ TEST(SURF)
SURF surf;
vector<KeyPoint> keypoints1, keypoints2;
vector<float> descriptors1, descriptors2;
surf(src1, Mat(), keypoints1);
surf(src2, Mat(), keypoints2);
surf(src1, Mat(), keypoints1, descriptors1);
surf(src2, Mat(), keypoints2, descriptors2);
gpu::SURF_GPU d_surf;
@ -275,8 +276,8 @@ TEST(SURF)
gpu::GpuMat d_descriptors1, d_descriptors2;
d_surf(d_src1, gpu::GpuMat(), d_keypoints1);
d_surf(d_src2, gpu::GpuMat(), d_keypoints2);
d_surf(d_src1, gpu::GpuMat(), d_keypoints1, d_descriptors1);
d_surf(d_src2, gpu::GpuMat(), d_keypoints2, d_descriptors2);

@ -51,10 +51,10 @@ int main(int argc, char* argv[])
vector<KeyPoint> keypoints1, keypoints2;
vector<float> descriptors1, descriptors2;
vector<DMatch> matches;
SURF_GPU::downloadKeypoints(keypoints1GPU, keypoints1);
SURF_GPU::downloadKeypoints(keypoints2GPU, keypoints2);
SURF_GPU::downloadDescriptors(descriptors1GPU, descriptors1);
SURF_GPU::downloadDescriptors(descriptors2GPU, descriptors2);
surf.downloadKeypoints(keypoints1GPU, keypoints1);
surf.downloadKeypoints(keypoints2GPU, keypoints2);
surf.downloadDescriptors(descriptors1GPU, descriptors1);
surf.downloadDescriptors(descriptors2GPU, descriptors2);
BruteForceMatcher_GPU< L2<float> >::matchDownload(trainIdx, distance, matches);
// drawing the results