added assertion on device features (global atomics) into gpu tests

2012-03-27 07:33:39 +00:00
parent 4a996111ea
commit bd13e9479b
6 changed files with 498 additions and 358 deletions
--- a/modules/gpu/src/brute_force_matcher.cpp
+++ b/modules/gpu/src/brute_force_matcher.cpp
@@ -761,7 +761,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
    if (query.empty() || train.empty())
        return;

-    using namespace ::cv::gpu::device::bf_radius_match;
+    using namespace cv::gpu::device::bf_radius_match;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
                             const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
@@ -789,7 +789,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
    DeviceInfo info;
    int cc = info.majorVersion() * 10 + info.minorVersion();

-    CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS));
+    if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
+        CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");

    const int nQuery = query.rows;
    const int nTrain = train.rows;
@@ -892,7 +893,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
    if (query.empty() || empty())
        return;

-    using namespace ::cv::gpu::device::bf_radius_match;
+    using namespace cv::gpu::device::bf_radius_match;

    typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
                             const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
@@ -920,7 +921,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
    DeviceInfo info;
    int cc = info.majorVersion() * 10 + info.minorVersion();

-    CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && info.supports(GLOBAL_ATOMICS));
+    if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
+        CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");

    const int nQuery = query.rows;

--- a/modules/gpu/src/cuda/surf.cu
+++ b/modules/gpu/src/cuda/surf.cu
@@ -509,7 +509,8 @@ namespace cv { namespace gpu { namespace device
            __shared__ float s_Y[128];
            __shared__ float s_angle[128];

-            __shared__ float s_sum[32 * 4];
+            __shared__ float s_sumx[32 * 4];
+            __shared__ float s_sumy[32 * 4];

            /* The sampling intervals and wavelet sized for selecting an orientation
             and building the keypoint descriptor are defined relative to 's' */
@@ -522,8 +523,9 @@ namespace cv { namespace gpu { namespace device
            const int grad_wav_size = 2 * __float2int_rn(2.0f * s);

            // check when grad_wav_size is too big
-            if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
-            {
+            if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
+                return;
+
            // Calc X, Y, angle and store it to shared memory
            const int tid = threadIdx.y * blockDim.x + threadIdx.x;

@@ -535,7 +537,8 @@ namespace cv { namespace gpu { namespace device
                const int x = __float2int_rn(featureX[blockIdx.x] + c_aptX[tid] * s - margin);
                const int y = __float2int_rn(featureY[blockIdx.x] + c_aptY[tid] * s - margin);

-                    if ((unsigned)y < (unsigned)((c_img_rows + 1) - grad_wav_size) && (unsigned)x < (unsigned)((c_img_cols + 1) - grad_wav_size))
+                if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
+                    x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
                {
                    X = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NX, 4, grad_wav_size, y, x);
                    Y = c_aptW[tid] * icvCalcHaarPatternSum<2>(c_NY, 4, grad_wav_size, y, x);
@@ -584,10 +587,8 @@ namespace cv { namespace gpu { namespace device
                    sumy += s_Y[threadIdx.x + 96];
                }

-                    float* s_sum_row = s_sum + threadIdx.y * 32;
-
-                    device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
-                    device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
+                device::reduce<32>(s_sumx + threadIdx.y * 32, sumx, threadIdx.x, plus<volatile float>());
+                device::reduce<32>(s_sumy + threadIdx.y * 32, sumy, threadIdx.x, plus<volatile float>());

                const float temp_mod = sumx * sumx + sumy * sumy;
                if (temp_mod > best_mod)
@@ -608,34 +609,18 @@ namespace cv { namespace gpu { namespace device
            }
            __syncthreads();

-                if (threadIdx.x < 2 && threadIdx.y == 0)
+            if (threadIdx.x == 0 && threadIdx.y == 0)
            {
-                    volatile float* v_x = s_X;
-                    volatile float* v_y = s_Y;
-                    volatile float* v_mod = s_angle;
+                int bestIdx = 0;

-                    bestx = v_x[threadIdx.x];
-                    besty = v_y[threadIdx.x];
-                    best_mod = v_mod[threadIdx.x];
+                if (s_angle[1] > s_angle[bestIdx])
+                    bestIdx = 1;
+                if (s_angle[2] > s_angle[bestIdx])
+                    bestIdx = 2;
+                if (s_angle[3] > s_angle[bestIdx])
+                    bestIdx = 3;

-                    float temp_mod = v_mod[threadIdx.x + 2];
-                    if (temp_mod > best_mod)
-                    {
-                        v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 2];
-                        v_y[threadIdx.x] = besty = v_y[threadIdx.x + 2];
-                        v_mod[threadIdx.x] = best_mod = temp_mod;
-                    }
-                    temp_mod = v_mod[threadIdx.x + 1];
-                    if (temp_mod > best_mod)
-                    {
-                        v_x[threadIdx.x] = bestx = v_x[threadIdx.x + 1];
-                        v_y[threadIdx.x] = besty = v_y[threadIdx.x + 1];
-                    }
-                }
-
-                if (threadIdx.x == 0 && threadIdx.y == 0 && best_mod != 0)
-                {
-                    float kp_dir = atan2f(besty, bestx);
+                float kp_dir = atan2f(s_Y[bestIdx], s_X[bestIdx]);
                if (kp_dir < 0)
                    kp_dir += 2.0f * CV_PI_F;
                kp_dir *= 180.0f / CV_PI_F;
@@ -643,7 +628,6 @@ namespace cv { namespace gpu { namespace device
                featureDir[blockIdx.x] = kp_dir;
            }
        }
-        }

        #undef ORI_SEARCH_INC
        #undef ORI_WIN
--- a/modules/gpu/src/fast.cpp
+++ b/modules/gpu/src/fast.cpp
@@ -124,7 +124,9 @@ int cv::gpu::FAST_GPU::calcKeyPointsLocation(const GpuMat& img, const GpuMat& ma

    CV_Assert(img.type() == CV_8UC1);
    CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
-    CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
+
+    if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
+        CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");

    int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());

@@ -146,7 +148,8 @@ int cv::gpu::FAST_GPU::getKeyPoints(GpuMat& keypoints)
 {
    using namespace cv::gpu::device::fast;

-    CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
+    if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
+        CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");

    if (count_ == 0)
        return 0;
--- a/modules/gpu/src/surf.cpp
+++ b/modules/gpu/src/surf.cpp
@@ -120,7 +120,9 @@ namespace
            CV_Assert(!img.empty() && img.type() == CV_8UC1);
            CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
            CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
-            CV_Assert(TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS));
+
+            if (!TargetArchs::builtWith(GLOBAL_ATOMICS) || !DeviceInfo().supports(GLOBAL_ATOMICS))
+                CV_Error(CV_StsNotImplemented, "The device doesn't support global atomics");

            const int min_size = calcSize(surf_.nOctaves - 1, 0);
            CV_Assert(img_rows - min_size >= 0);
--- a/modules/gpu/test/test_features2d.cpp
+++ b/modules/gpu/test/test_features2d.cpp
@@ -108,6 +108,25 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char

 #define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual);

+int getMatchedPointsCount(std::vector<cv::KeyPoint>& gold, std::vector<cv::KeyPoint>& actual)
+{
+    std::sort(actual.begin(), actual.end(), KeyPointLess());
+    std::sort(gold.begin(), gold.end(), KeyPointLess());
+
+    int validCount = 0;
+
+    for (size_t i = 0; i < gold.size(); ++i)
+    {
+        const cv::KeyPoint& p1 = gold[i];
+        const cv::KeyPoint& p2 = actual[i];
+
+        if (keyPointsEquals(p1, p2))
+            ++validCount;
+    }
+
+    return validCount;
+}
+
 int getMatchedPointsCount(const std::vector<cv::KeyPoint>& keypoints1, const std::vector<cv::KeyPoint>& keypoints2, const std::vector<cv::DMatch>& matches)
 {
    int validCount = 0;
@@ -170,6 +189,20 @@ TEST_P(SURF, Detector)
    surf.upright = upright;
    surf.keypointsRatio = 0.05f;

+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    {
+        try
+        {
+            std::vector<cv::KeyPoint> keypoints;
+            surf(loadMat(image), cv::gpu::GpuMat(), keypoints);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        std::vector<cv::KeyPoint> keypoints;
        surf(loadMat(image), cv::gpu::GpuMat(), keypoints);

@@ -183,7 +216,12 @@ TEST_P(SURF, Detector)
        std::vector<cv::KeyPoint> keypoints_gold;
        surf_gold(image, cv::noArray(), keypoints_gold);

-    ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
+        ASSERT_EQ(keypoints_gold.size(), keypoints.size());
+        int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
+        double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
+
+        EXPECT_GT(matchedRatio, 0.95);
+    }
 }

 TEST_P(SURF, Detector_Masked)
@@ -202,6 +240,20 @@ TEST_P(SURF, Detector_Masked)
    surf.upright = upright;
    surf.keypointsRatio = 0.05f;

+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    {
+        try
+        {
+            std::vector<cv::KeyPoint> keypoints;
+            surf(loadMat(image), loadMat(mask), keypoints);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        std::vector<cv::KeyPoint> keypoints;
        surf(loadMat(image), loadMat(mask), keypoints);

@@ -215,7 +267,12 @@ TEST_P(SURF, Detector_Masked)
        std::vector<cv::KeyPoint> keypoints_gold;
        surf_gold(image, mask, keypoints_gold);

-    ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
+        ASSERT_EQ(keypoints_gold.size(), keypoints.size());
+        int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints);
+        double matchedRatio = static_cast<double>(matchedCount) / keypoints_gold.size();
+
+        EXPECT_GT(matchedRatio, 0.95);
+    }
 }

 TEST_P(SURF, Descriptor)
@@ -238,6 +295,21 @@ TEST_P(SURF, Descriptor)
    surf_gold.extended = extended;
    surf_gold.upright = upright;

+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    {
+        try
+        {
+            std::vector<cv::KeyPoint> keypoints;
+            cv::gpu::GpuMat descriptors;
+            surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        std::vector<cv::KeyPoint> keypoints;
        surf_gold(image, cv::noArray(), keypoints);

@@ -256,6 +328,7 @@ TEST_P(SURF, Descriptor)

        EXPECT_GT(matchedRatio, 0.35);
    }
+}

 INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine(
    ALL_DEVICES,
@@ -295,6 +368,20 @@ TEST_P(FAST, Accuracy)
    cv::gpu::FAST_GPU fast(threshold);
    fast.nonmaxSupression = nonmaxSupression;

+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    {
+        try
+        {
+            std::vector<cv::KeyPoint> keypoints;
+            fast(loadMat(image), cv::gpu::GpuMat(), keypoints);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        std::vector<cv::KeyPoint> keypoints;
        fast(loadMat(image), cv::gpu::GpuMat(), keypoints);

@@ -303,6 +390,7 @@ TEST_P(FAST, Accuracy)

        ASSERT_KEYPOINTS_EQ(keypoints_gold, keypoints);
    }
+}

 INSTANTIATE_TEST_CASE_P(GPU_Features2D, FAST, testing::Combine(
    ALL_DEVICES,
@@ -364,6 +452,21 @@ TEST_P(ORB, Accuracy)
    cv::gpu::ORB_GPU orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize);
    orb.blurForDescriptor = blurForDescriptor;

+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    {
+        try
+        {
+            std::vector<cv::KeyPoint> keypoints;
+            cv::gpu::GpuMat descriptors;
+            orb(loadMat(image), loadMat(mask), keypoints, descriptors);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        std::vector<cv::KeyPoint> keypoints;
        cv::gpu::GpuMat descriptors;
        orb(loadMat(image), loadMat(mask), keypoints, descriptors);
@@ -383,6 +486,7 @@ TEST_P(ORB, Accuracy)

        EXPECT_GT(matchedRatio, 0.35);
    }
+}

 INSTANTIATE_TEST_CASE_P(GPU_Features2D, ORB,  testing::Combine(
    ALL_DEVICES,
@@ -713,6 +817,20 @@ TEST_P(BruteForceMatcher, RadiusMatch)

    cv::gpu::BruteForceMatcher_GPU_base matcher(distType);

+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    {
+        try
+        {
+            std::vector< std::vector<cv::DMatch> > matches;
+            matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        std::vector< std::vector<cv::DMatch> > matches;
        matcher.radiusMatch(loadMat(query), loadMat(train), matches, radius);

@@ -733,6 +851,7 @@ TEST_P(BruteForceMatcher, RadiusMatch)

        ASSERT_EQ(0, badCount);
    }
+}

 TEST_P(BruteForceMatcher, RadiusMatchAdd)
 {
@@ -756,6 +875,20 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)
            masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
    }

+    if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
+    {
+        try
+        {
+            std::vector< std::vector<cv::DMatch> > matches;
+            matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        std::vector< std::vector<cv::DMatch> > matches;
        matcher.radiusMatch(cv::gpu::GpuMat(query), matches, radius, masks);

@@ -793,6 +926,7 @@ TEST_P(BruteForceMatcher, RadiusMatchAdd)

        ASSERT_EQ(0, badCount);
    }
+}

 INSTANTIATE_TEST_CASE_P(GPU_Features2D, BruteForceMatcher, testing::Combine(
    ALL_DEVICES,
--- a/modules/gpu/test/test_filters.cpp
+++ b/modules/gpu/test/test_filters.cpp
@@ -258,6 +258,20 @@ TEST_P(GaussianBlur, Accuracy)
    double sigma1 = randomDouble(0.1, 1.0);
    double sigma2 = randomDouble(0.1, 1.0);

+    if (ksize.height > 16 && !supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
+    {
+        try
+        {
+            cv::gpu::GpuMat dst;
+            cv::gpu::GaussianBlur(loadMat(src), dst, ksize, sigma1, sigma2, borderType);
+        }
+        catch (const cv::Exception& e)
+        {
+            ASSERT_EQ(CV_StsNotImplemented, e.code);
+        }
+    }
+    else
+    {
        cv::gpu::GpuMat dst = createMat(size, type, useRoi);
        cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType);

@@ -266,6 +280,7 @@ TEST_P(GaussianBlur, Accuracy)

        EXPECT_MAT_NEAR(dst_gold, dst, 4.0);
    }
+}

 INSTANTIATE_TEST_CASE_P(GPU_Filter, GaussianBlur, testing::Combine(
    ALL_DEVICES,