From cae59a7caf5ffff2cb94cf65b78bd1cce5ddb50e Mon Sep 17 00:00:00 2001
From: Alexey Spizhevoy <no@email>
Date: Mon, 28 Feb 2011 12:44:19 +0000
Subject: [PATCH] added gpu::solvePnpRansac

---
 modules/gpu/include/opencv2/gpu/gpu.hpp       |  19 +++
 .../src/{project_points.cpp => calib3d.cpp}   | 139 ++++++++++++++++++
 .../cuda/{project_points.cu => calib3d.cu}    |  74 ++++++++++
 ...st_project_points.cpp => test_calib3d.cpp} |  28 ++++
 4 files changed, 260 insertions(+)
 rename modules/gpu/src/{project_points.cpp => calib3d.cpp} (51%)
 rename modules/gpu/src/cuda/{project_points.cu => calib3d.cu} (62%)
 rename modules/gpu/test/{test_project_points.cpp => test_calib3d.cpp} (82%)
diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index 83f0b6e83..b2ac95283 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -868,6 +868,25 @@ namespace cv
                                       const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
                                       const Stream& stream);
 
+        struct CV_EXPORTS SolvePnpRansacParams
+        {
+            SolvePnpRansacParams(): subset_size(4),
+                                    use_extrinsic_guess(false),
+                                    num_iters(100),
+                                    max_dist(2.f),
+                                    min_num_inliers(-1),
+                                    inliers(NULL) {}
+            int subset_size;
+            bool use_extrinsic_guess;
+            int num_iters;
+            float max_dist;
+            int min_num_inliers;
+            vector<int>* inliers;
+        };
+
+        CV_EXPORTS void solvePnpRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
+                                       const Mat& dist_coef, Mat& rvec, Mat& tvec, SolvePnpRansacParams params);
+
         //////////////////////////////// Filter Engine ////////////////////////////////
 
         /*!
diff --git a/modules/gpu/src/project_points.cpp b/modules/gpu/src/calib3d.cpp
similarity index 51%
rename from modules/gpu/src/project_points.cpp
rename to modules/gpu/src/calib3d.cpp
index 5556dcd87..3eedf63a3 100644
--- a/modules/gpu/src/project_points.cpp
+++ b/modules/gpu/src/calib3d.cpp
@@ -56,6 +56,9 @@ void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&,
 void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&,
                             const Mat&, const Mat&, GpuMat&, const Stream&) { throw_nogpu(); }
 
+void cv::gpu::solvePnpRansac(const Mat&, const Mat&, const Mat&, const Mat&,
+                             Mat&, Mat&, SolvePnpRansacParams) { throw_nogpu(); }
+
 #else
 
 using namespace cv;
@@ -103,6 +106,7 @@ namespace cv { namespace gpu { namespace project_points
               const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
 }}}
 
+
 namespace
 {
     void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec,
@@ -138,4 +142,139 @@ void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
     ::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
 }
 
+
+namespace cv { namespace gpu { namespace solve_pnp_ransac
+{
+    void computeHypothesisScores(
+            const int num_hypotheses, const int num_points, const float* rot_matrices,
+            const float3* transl_vectors, const float3* object, const float2* image,
+            const float3* camera_mat, const float dist_threshold, int* hypothesis_scores);
+}}}
+
+namespace
+{
+    // Selects subset_size random different points from [0, num_points - 1] range
+    void selectRandom(int subset_size, int num_points, vector<int>& subset)
+    {
+        subset.resize(subset_size);
+        for (int i = 0; i < subset_size; ++i)
+        {
+            bool was;
+            do
+            {
+                subset[i] = rand() % num_points;
+                was = false;
+                for (int j = 0; j < i; ++j)
+                    if (subset[j] == subset[i])
+                    {
+                        was = true;
+                        break;
+                    }
+            } while (was);
+        }
+    }
+}
+
+void cv::gpu::solvePnpRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
+                             const Mat& dist_coef, Mat& rvec, Mat& tvec, SolvePnpRansacParams params)
+{
+    CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
+    CV_Assert(image.rows == 1 && image.cols > 1 && image.type() == CV_32FC2);
+    CV_Assert(object.cols == image.cols);
+    CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
+    CV_Assert(dist_coef.empty()); // We don't support undistortion for now
+    CV_Assert(!params.use_extrinsic_guess); // We don't support initial guess for now
+
+    const int num_points = object.cols;
+
+    // Current hypothesis input
+    vector<int> subset_indices(params.subset_size);
+    Mat_<Point3f> object_subset(1, params.subset_size);
+    Mat_<Point2f> image_subset(1, params.subset_size);
+
+    // Current hypothesis result
+    Mat rot_vec(1, 3, CV_64F);
+    Mat rot_mat(3, 3, CV_64F);
+    Mat transl_vec(1, 3, CV_64F);
+
+    // All hypotheses results
+    Mat rot_matrices(1, params.num_iters * 9, CV_32F);
+    Mat transl_vectors(1, params.num_iters * 3, CV_32F);
+
+    // Generate set of (rotation, translation) hypotheses using small subsets
+    // of the input data
+    for (int iter = 0; iter < params.num_iters; ++iter) // TODO TBB?
+    {
+        selectRandom(params.subset_size, num_points, subset_indices);
+        for (int i = 0; i < params.subset_size; ++i)
+        {
+            object_subset(0, i) = object.at<Point3f>(subset_indices[i]);
+            image_subset(0, i) = image.at<Point2f>(subset_indices[i]);
+        }
+
+        solvePnP(object_subset, image_subset, camera_mat, dist_coef, rot_vec, transl_vec);
+
+        // Remember translation vector
+        Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
+        transl_vec = transl_vec.reshape(0, 1);
+        transl_vec.convertTo(transl_vec_, CV_32F);
+
+        // Remember rotation matrix
+        Rodrigues(rot_vec, rot_mat);
+        Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
+        rot_mat.convertTo(rot_mat_, CV_32F);
+    }
+
+    // Compute scores (i.e. number of inliers) for each hypothesis
+    GpuMat d_object(object);
+    GpuMat d_image(image);
+    GpuMat d_hypothesis_scores(1, params.num_iters, CV_32S);
+    solve_pnp_ransac::computeHypothesisScores(
+            params.num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
+            d_object.ptr<float3>(), d_image.ptr<float2>(), camera_mat.ptr<float3>(),
+            params.max_dist * params.max_dist, d_hypothesis_scores.ptr<int>());
+
+    // Find the best hypothesis index
+    Point best_idx;
+    double best_score;
+    minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
+    int num_inliers = static_cast<int>(best_score);
+
+    // Extract the best hypothesis data
+    rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
+    Rodrigues(rot_mat, rvec);
+    rvec = rvec.reshape(0, 1);
+    tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
+    tvec = tvec.reshape(0, 1);
+
+    // Build vector of inlier indices
+    if (params.inliers != NULL)
+    {
+        params.inliers->resize(num_inliers);
+
+        Point3f p;
+        Point3f p_transf;
+        Point2f p_proj;
+        const float* rot = rot_mat.ptr<float>();
+        const float* transl = tvec.ptr<float>();
+        int inlier_id = 0;
+
+        for (int i = 0; i < num_points; ++i)
+        {
+            p = object.at<Point3f>(0, i);
+            p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
+            p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
+            p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
+            if (p_transf.z > 0.f)
+            {
+                p_proj.x = camera_mat.at<float>(0, 0) * p_transf.x / p_transf.z + camera_mat.at<float>(0, 2);
+                p_proj.y = camera_mat.at<float>(1, 1) * p_transf.x / p_transf.z + camera_mat.at<float>(1, 2);
+                if (norm(p_proj - image.at<Point2f>(0, i)) < params.max_dist)
+                    (*params.inliers)[inlier_id++] = i;
+            }
+        }
+    }
+}
+
 #endif
+
diff --git a/modules/gpu/src/cuda/project_points.cu b/modules/gpu/src/cuda/calib3d.cu
similarity index 62%
rename from modules/gpu/src/cuda/project_points.cu
rename to modules/gpu/src/cuda/calib3d.cu
index 08108acf8..e46e4eae1 100644
--- a/modules/gpu/src/cuda/project_points.cu
+++ b/modules/gpu/src/cuda/calib3d.cu
@@ -43,6 +43,8 @@
 #include "internal_shared.hpp"
 #include "opencv2/gpu/device/transform.hpp"
 
+#define SOLVE_PNP_RANSAC_NUM_ITERS 200
+
 namespace cv { namespace gpu
 {
     namespace transform_points
@@ -75,6 +77,7 @@ namespace cv { namespace gpu
         }
     } // namespace transform_points
 
+
     namespace project_points
     {
         __constant__ float3 crot0;
@@ -114,4 +117,75 @@ namespace cv { namespace gpu
         }
     } // namespace project_points
 
+
+    namespace solve_pnp_ransac
+    {
+        __constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_NUM_ITERS * 3];
+        __constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_NUM_ITERS];
+        __constant__ float3 ccamera_mat[2];
+
+        __device__ float sqr(float x)
+        {
+            return x * x;
+        }
+
+        __global__ void computeHypothesisScoresKernel(
+                const int num_points, const float3* object, const float2* image,
+                const float dist_threshold, int* g_num_inliers)
+        {
+            const float3* const &rot_mat = crot_matrices + blockIdx.x * 3;
+            const float3 &transl_vec = ctransl_vectors[blockIdx.x];
+            int num_inliers = 0;
+
+            for (int i = threadIdx.x; i < num_points; i += blockDim.x)
+            {
+                float3 p = object[i];
+                p = make_float3(
+                        rot_mat[0].x * p.x + rot_mat[0].y * p.y + rot_mat[0].z * p.z + transl_vec.x,
+                        rot_mat[1].x * p.x + rot_mat[1].y * p.y + rot_mat[1].z * p.z + transl_vec.y,
+                        rot_mat[2].x * p.x + rot_mat[2].y * p.y + rot_mat[2].z * p.z + transl_vec.z);
+                if (p.z > 0)
+                {
+                    p.x = ccamera_mat[0].x * p.x / p.z + ccamera_mat[0].z;
+                    p.y = ccamera_mat[1].y * p.y / p.z + ccamera_mat[1].z;
+                    float2 image_p = image[i];
+                    if (sqr(p.x - image_p.x) + sqr(p.y - image_p.y) < dist_threshold)
+                        ++num_inliers;
+                }
+            }
+
+            extern __shared__ float s_num_inliers[];
+            s_num_inliers[threadIdx.x] = num_inliers;
+            __syncthreads();
+
+            for (int step = blockDim.x / 2; step > 0; step >>= 1)
+            {
+                if (threadIdx.x < step)
+                    s_num_inliers[threadIdx.x] += s_num_inliers[threadIdx.x + step];
+                __syncthreads();
+            }
+
+            if (threadIdx.x == 0)
+                g_num_inliers[blockIdx.x] = s_num_inliers[0];
+        }
+
+        void computeHypothesisScores(
+                const int num_hypotheses, const int num_points, const float* rot_matrices,
+                const float3* transl_vectors, const float3* object, const float2* image,
+                const float3* camera_mat, const float dist_threshold, int* hypothesis_scores)
+        {
+            cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3)));
+            cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3)));
+            cudaSafeCall(cudaMemcpyToSymbol(ccamera_mat, camera_mat, 2 * sizeof(float3)));
+
+            dim3 threads(256);
+            dim3 grid(num_hypotheses);
+            int smem_size = threads.x * sizeof(float);
+
+            computeHypothesisScoresKernel<<<grid, threads, smem_size>>>(
+                    num_points, object, image, dist_threshold, hypothesis_scores);
+            cudaSafeCall(cudaThreadSynchronize());
+        }
+    } // namespace solvepnp_ransac
+
 }} // namespace cv { namespace gpu
diff --git a/modules/gpu/test/test_project_points.cpp b/modules/gpu/test/test_calib3d.cpp
similarity index 82%
rename from modules/gpu/test/test_project_points.cpp
rename to modules/gpu/test/test_calib3d.cpp
index 95e16f74c..19edcddc5 100644
--- a/modules/gpu/test/test_project_points.cpp
+++ b/modules/gpu/test/test_calib3d.cpp
@@ -105,3 +105,31 @@ TEST(transformPoints, accuracy)
         ASSERT_LT(err.dot(err) / res_gold.dot(res_gold), 1e-3f);
     }
 }
+
+
+TEST(solvePnpRansac, accuracy)
+{
+    RNG& rng = TS::ptr()->get_rng();
+
+    const int num_points = 5000;
+    Mat object = randomMat(rng, Size(num_points, 1), CV_32FC3, 0, 100, false);
+    Mat camera_mat = randomMat(rng, Size(3, 3), CV_32F, 1, 1, false);
+    camera_mat.at<float>(0, 1) = 0.f;
+    camera_mat.at<float>(1, 0) = 0.f;
+    camera_mat.at<float>(2, 0) = 0.f;
+    camera_mat.at<float>(2, 1) = 0.f;
+
+    Mat rvec_gold = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
+    Mat tvec_gold = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false);
+
+    vector<Point2f> image_vec;
+    projectPoints(object, rvec_gold, tvec_gold, camera_mat, Mat(), image_vec);
+    Mat image(1, image_vec.size(), CV_32FC2, &image_vec[0]);
+
+    Mat rvec;
+    Mat tvec;
+    solvePnpRansac(object, image, camera_mat, Mat(), rvec, tvec, SolvePnpRansacParams());
+
+    ASSERT_LE(norm(rvec - rvec_gold), 1e-3f);
+    ASSERT_LE(norm(tvec - tvec_gold), 1e-3f);
+}