added assertion on device features (global atomics) into gpu tests

This commit is contained in:
Vladislav Vinogradov 2012-03-27 07:33:39 +00:00
parent 4a996111ea
commit bd13e9479b
6 changed files with 498 additions and 358 deletions

View File

@ -761,7 +761,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
if (query.empty() || train.empty())
return;
using namespace ::cv::gpu::device::bf_radius_match;
using namespace cv::gpu::device::bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
@ -789,7 +789,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
DeviceInfo info;
int cc = info.majorVersion() * 10 + info.minorVersion();
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
const int nQuery = query.rows;
const int nTrain = train.rows;
@ -892,7 +893,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
if (query.empty() || empty())
return;
using namespace ::cv::gpu::device::bf_radius_match;
using namespace cv::gpu::device::bf_radius_match;
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
@ -920,7 +921,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
DeviceInfo info;
int cc = info.majorVersion() * 10 + info.minorVersion();
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
const int nQuery = query.rows;

View File

@ -509,7 +509,8 @@ namespace cv { namespace gpu { namespace device
__shared__ float s_Y[128];
__shared__ float s_angle[128];
__shared__ float s_sum[32 * 4];
__shared__ float s_sumx[32 * 4];
__shared__ float s_sumy[32 * 4];
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
@ -522,126 +523,109 @@ namespace cv { namespace gpu { namespace device
const int grad_wav_size = 2 * __float2int_rn(2.0f * s);
// check when grad_wav_size is too big
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
return;
// Calc X, Y, angle and store it to shared memory
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
float X = 0.0f, Y = 0.0f, angle = 0.0f;
if (tid < ORI_SAMPLES)
{
// Calc X, Y, angle and store it to shared memory
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
const float margin = (float)(grad_wav_size - 1) / 2.0f;
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
float X = 0.0f, Y = 0.0f, angle = 0.0f;
if (tid < ORI_SAMPLES)
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
{
const float margin = (float)(grad_wav_size - 1) / 2.0f;
const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
if ((unsigned)y < (unsigned)((c_img_rows + 1) - grad_wav_size) && (unsigned)x < (unsigned)((c_img_cols + 1) - grad_wav_size))
{
X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
angle = atan2f(Y, X);
if (angle < 0)
angle += 2.0f * CV_PI_F;
angle *= 180.0f / CV_PI_F;
}
angle = atan2f(Y, X);
if (angle < 0)
angle += 2.0f * CV_PI_F;
angle *= 180.0f / CV_PI_F;
}
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
}
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
__syncthreads();
float bestx = 0, besty = 0, best_mod = 0;
#pragma unroll
for (int i = 0; i < 18; ++i)
{
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
float sumx = 0.0f, sumy = 0.0f;
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx = s_X[threadIdx.x];
sumy = s_Y[threadIdx.x];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 32];
sumy += s_Y[threadIdx.x + 32];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 64];
sumy += s_Y[threadIdx.x + 64];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 96];
sumy += s_Y[threadIdx.x + 96];
}
device::reduce<32>(s_sumx + threadIdx.y * 32, sumx, threadIdx.x, plus<volatile float>());
device::reduce<32>(s_sumy + threadIdx.y * 32, sumy, threadIdx.x, plus<volatile float>());
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
{
best_mod = temp_mod;
bestx = sumx;
besty = sumy;
}
__syncthreads();
}
float bestx = 0, besty = 0, best_mod = 0;
if (threadIdx.x == 0)
{
s_X[threadIdx.y] = bestx;
s_Y[threadIdx.y] = besty;
s_angle[threadIdx.y] = best_mod;
}
__syncthreads();
#pragma unroll
for (int i = 0; i < 18; ++i)
{
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
if (threadIdx.x == 0 && threadIdx.y == 0)
{
int bestIdx = 0;
float sumx = 0.0f, sumy = 0.0f;
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx = s_X[threadIdx.x];
sumy = s_Y[threadIdx.x];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 32];
sumy += s_Y[threadIdx.x + 32];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 64];
sumy += s_Y[threadIdx.x + 64];
}
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[threadIdx.x + 96];
sumy += s_Y[threadIdx.x + 96];
}
if (s_angle[1] > s_angle[bestIdx])
bestIdx = 1;
if (s_angle[2] > s_angle[bestIdx])
bestIdx = 2;
if (s_angle[3] > s_angle[bestIdx])
bestIdx = 3;
float* s_sum_row = s_sum + threadIdx.y * 32;
float kp_dir = atan2f(s_Y[bestIdx], s_X[bestIdx]);
if (kp_dir < 0)
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
{
best_mod = temp_mod;
bestx = sumx;
besty = sumy;
}
__syncthreads();
}
if (threadIdx.x == 0)
{
s_X[threadIdx.y] = bestx;
s_Y[threadIdx.y] = besty;
s_angle[threadIdx.y] = best_mod;
}
__syncthreads();
if (threadIdx.x < 2 && threadIdx.y == 0)
{
volatile float* v_x = s_X;
volatile float* v_y = s_Y;
volatile float* v_mod = s_angle;
bestx = v_x[threadIdx.x];
besty = v_y[threadIdx.x];
best_mod = v_mod[threadIdx.x];
float temp_mod = v_mod[threadIdx.x + 2];
if (temp_mod > best_mod)
{
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 2];
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 2];
v_mod[threadIdx.x] = best_mod = temp_mod;
}
temp_mod = v_mod[threadIdx.x + 1];
if (temp_mod > best_mod)
{
v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 1];
v_y[threadIdx.x] = besty = v_y[threadIdx.x + 1];
}
}
if (threadIdx.x == 0 && threadIdx.y == 0 && best_mod != 0)
{
float kp_dir = atan2f(besty, bestx);
if (kp_dir < 0)
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
featureDir[blockIdx.x] = kp_dir;
}
featureDir[blockIdx.x] = kp_dir;
}
}

View File

@ -124,7 +124,9 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma
CV_Assert(img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
@ -146,7 +148,8 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
{
using namespace cv::gpu::device::fast;
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
if (count_ == 0)
return 0;

View File

@ -120,7 +120,9 @@ namespace
CV_Assert(!img.empty() && img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");
const int min_size = calcSize(surf_.nOctaves - 1, 0);
CV_Assert(img_rows - min_size >= 0);

View File

@ -108,6 +108,25 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char
#define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual);
int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
{
std::sort(actual.begin(), actual.end(), KeyPointLess());
std::sort(gold.begin(), gold.end(), KeyPointLess());
int validCount = 0;
for (size_t i = 0; i < gold.size(); ++i)
{
const cv::KeyPoint& p1 = gold[i];
const cv::KeyPoint& p2 = actual[i];
if (keyPointsEquals(p1, p2))
++validCount;
}
return validCount;
}
int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
{
int validCount = 0;
@ -170,20 +189,39 @@ TEST_P(SURF, Detector)
surf.upright = upright;
surf.keypointsRatio = 0.05f;
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, cv::noArray(), keypoints_gold);
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
}
TEST_P(SURF, Detector_Masked)
@ -202,20 +240,39 @@ TEST_P(SURF, Detector_Masked)
surf.upright = upright;
surf.keypointsRatio = 0.05f;
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf(loadMat(image), loadMat(mask), keypoints);
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
cv::SURF surf_gold;
surf_gold.hessianThreshold = hessianThreshold;
surf_gold.nOctaves = nOctaves;
surf_gold.nOctaveLayers = nOctaveLayers;
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, mask, keypoints_gold);
std::vector<cv::KeyPoint> keypoints_gold;
surf_gold(image, mask, keypoints_gold);
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
ASSERT_EQ(keypoints_gold.size(), keypoints.size());
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
EXPECT_GT(matchedRatio, 0.95);
}
}
TEST_P(SURF, Descriptor)
@ -238,23 +295,39 @@ TEST_P(SURF, Descriptor)
surf_gold.extended = extended;
surf_gold.upright = upright;
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
surf_gold(image, cv::noArray(), keypoints);
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
cv::gpu::GpuMat descriptors;
surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::Mat descriptors_gold;
surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true);
cv::BFMatcher matcher(cv::NORM_L2);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
cv::BFMatcher matcher(cv::NORM_L2);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
EXPECT_GT(matchedRatio, 0.35);
EXPECT_GT(matchedRatio, 0.35);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
@ -295,13 +368,28 @@ TEST_P(FAST, Accuracy)
cv::gpu::FAST_GPU fast(threshold);
fast.nonmaxSupression = nonmaxSupression;
std::vector<cv::KeyPoint> keypoints;
fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
std::vector<cv::KeyPoint> keypoints_gold;
cv::FAST(image, keypoints_gold, threshold, nonmaxSupression);
std::vector<cv::KeyPoint> keypoints_gold;
cv::FAST(image, keypoints_gold, threshold, nonmaxSupression);
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Features2D, FAST, testing::Combine(
@ -364,24 +452,40 @@ TEST_P(ORB, Accuracy)
cv::gpu::ORB_GPU orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
orb.blurForDescriptor = blurForDescriptor;
std::vector<cv::KeyPoint> keypoints;
cv::gpu::GpuMat descriptors;
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
try
{
std::vector<cv::KeyPoint> keypoints;
cv::gpu::GpuMat descriptors;
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector<cv::KeyPoint> keypoints;
cv::gpu::GpuMat descriptors;
orb(loadMat(image), loadMat(mask), keypoints, descriptors);
cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
std::vector<cv::KeyPoint> keypoints_gold;
cv::Mat descriptors_gold;
orb_gold(image, mask, keypoints_gold, descriptors_gold);
std::vector<cv::KeyPoint> keypoints_gold;
cv::Mat descriptors_gold;
orb_gold(image, mask, keypoints_gold, descriptors_gold);
cv::BFMatcher matcher(cv::NORM_HAMMING);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
cv::BFMatcher matcher(cv::NORM_HAMMING);
std::vector<cv::DMatch> matches;
matcher.match(descriptors_gold, cv::Mat(descriptors), matches);
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches);
double matchedRatio = static_cast<double>(matchedCount) / keypoints.size();
EXPECT_GT(matchedRatio, 0.35);
EXPECT_GT(matchedRatio, 0.35);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Features2D, ORB, testing::Combine(
@ -713,25 +817,40 @@ TEST_P(BruteForceMatcher, RadiusMatch)
cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
int badCount = 0;
for (size_t i = 0; i < matches.size(); i++)
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
if ((int)matches[i].size() != 1)
badCount++;
else
try
{
cv::DMatch match = matches[i][0];
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
badCount++;
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
ASSERT_EQ(0, badCount);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
int badCount = 0;
for (size_t i = 0; i < matches.size(); i++)
{
if ((int)matches[i].size() != 1)
badCount++;
else
{
cv::DMatch match = matches[i][0];
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i*countFactor) || (match.imgIdx != 0))
badCount++;
}
}
ASSERT_EQ(0, badCount);
}
}
TEST_P(BruteForceMatcher, RadiusMatchAdd)
@ -756,42 +875,57 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
}
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
int badCount = 0;
int shift = matcher.isMaskSupported() ? 1 : 0;
int needMatchCount = matcher.isMaskSupported() ? n-1 : n;
for (size_t i = 0; i < matches.size(); i++)
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
{
if ((int)matches[i].size() != needMatchCount)
badCount++;
else
try
{
int localBadCount = 0;
for (int k = 0; k < needMatchCount; k++)
{
cv::DMatch match = matches[i][k];
{
if ((int)i < queryDescCount / 2)
{
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
localBadCount++;
}
else
{
if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
localBadCount++;
}
}
}
badCount += localBadCount > 0 ? 1 : 0;
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
std::vector< std::vector<cv::DMatch> > matches;
matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
ASSERT_EQ(0, badCount);
ASSERT_EQ(static_cast<size_t>(queryDescCount), matches.size());
int badCount = 0;
int shift = matcher.isMaskSupported() ? 1 : 0;
int needMatchCount = matcher.isMaskSupported() ? n-1 : n;
for (size_t i = 0; i < matches.size(); i++)
{
if ((int)matches[i].size() != needMatchCount)
badCount++;
else
{
int localBadCount = 0;
for (int k = 0; k < needMatchCount; k++)
{
cv::DMatch match = matches[i][k];
{
if ((int)i < queryDescCount / 2)
{
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
localBadCount++;
}
else
{
if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
localBadCount++;
}
}
}
badCount += localBadCount > 0 ? 1 : 0;
}
}
ASSERT_EQ(0, badCount);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(

View File

@ -258,13 +258,28 @@ TEST_P(GaussianBlur, Accuracy)
double sigma1 = randomDouble(0.1, 1.0);
double sigma2 = randomDouble(0.1, 1.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType);
if (ksize.height > 16 && !supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::GaussianBlur(loadMat(src), dst, ksize, sigma1, sigma2, borderType);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType);
cv::Mat dst_gold;
cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType);
cv::Mat dst_gold;
cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType);
EXPECT_MAT_NEAR(dst_gold, dst, 4.0);
EXPECT_MAT_NEAR(dst_gold, dst, 4.0);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine(