From cae59a7caf5ffff2cb94cf65b78bd1cce5ddb50e Mon Sep 17 00:00:00 2001 From: Alexey Spizhevoy Date: Mon, 28 Feb 2011 12:44:19 +0000 Subject: [PATCH] added gpu::solvePnpRansac --- modules/gpu/include/opencv2/gpu/gpu.hpp | 19 +++ .../src/{project_points.cpp => calib3d.cpp} | 139 ++++++++++++++++++ .../cuda/{project_points.cu => calib3d.cu} | 74 ++++++++++ ...st_project_points.cpp => test_calib3d.cpp} | 28 ++++ 4 files changed, 260 insertions(+) rename modules/gpu/src/{project_points.cpp => calib3d.cpp} (51%) rename modules/gpu/src/cuda/{project_points.cu => calib3d.cu} (62%) rename modules/gpu/test/{test_project_points.cpp => test_calib3d.cpp} (82%) diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 83f0b6e83..b2ac95283 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -868,6 +868,25 @@ namespace cv const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, const Stream& stream); + struct CV_EXPORTS SolvePnpRansacParams + { + SolvePnpRansacParams(): subset_size(4), + use_extrinsic_guess(false), + num_iters(100), + max_dist(2.f), + min_num_inliers(-1), + inliers(NULL) {} + int subset_size; + bool use_extrinsic_guess; + int num_iters; + float max_dist; + int min_num_inliers; + vector* inliers; + }; + + CV_EXPORTS void solvePnpRansac(const Mat& object, const Mat& image, const Mat& camera_mat, + const Mat& dist_coef, Mat& rvec, Mat& tvec, SolvePnpRansacParams params); + //////////////////////////////// Filter Engine //////////////////////////////// /*! diff --git a/modules/gpu/src/project_points.cpp b/modules/gpu/src/calib3d.cpp similarity index 51% rename from modules/gpu/src/project_points.cpp rename to modules/gpu/src/calib3d.cpp index 5556dcd87..3eedf63a3 100644 --- a/modules/gpu/src/project_points.cpp +++ b/modules/gpu/src/calib3d.cpp @@ -56,6 +56,9 @@ void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, const Stream&) { throw_nogpu(); } +void cv::gpu::solvePnpRansac(const Mat&, const Mat&, const Mat&, const Mat&, + Mat&, Mat&, SolvePnpRansacParams) { throw_nogpu(); } + #else using namespace cv; @@ -103,6 +106,7 @@ namespace cv { namespace gpu { namespace project_points const float* proj, DevMem2D_ dst, cudaStream_t stream); }}} + namespace { void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, @@ -138,4 +142,139 @@ void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, ::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream)); } + +namespace cv { namespace gpu { namespace solve_pnp_ransac +{ + void computeHypothesisScores( + const int num_hypotheses, const int num_points, const float* rot_matrices, + const float3* transl_vectors, const float3* object, const float2* image, + const float3* camera_mat, const float dist_threshold, int* hypothesis_scores); +}}} + +namespace +{ + // Selects subset_size random different points from [0, num_points - 1] range + void selectRandom(int subset_size, int num_points, vector& subset) + { + subset.resize(subset_size); + for (int i = 0; i < subset_size; ++i) + { + bool was; + do + { + subset[i] = rand() % num_points; + was = false; + for (int j = 0; j < i; ++j) + if (subset[j] == subset[i]) + { + was = true; + break; + } + } while (was); + } + } +} + +void cv::gpu::solvePnpRansac(const Mat& object, const Mat& image, const Mat& camera_mat, + const Mat& dist_coef, Mat& rvec, Mat& tvec, SolvePnpRansacParams params) +{ + CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3); + CV_Assert(image.rows == 1 && image.cols > 1 && image.type() == CV_32FC2); + CV_Assert(object.cols == image.cols); + CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F); + CV_Assert(dist_coef.empty()); // We don't support undistortion for now + CV_Assert(!params.use_extrinsic_guess); // We don't support initial guess for now + + const int num_points = object.cols; + + // Current hypothesis input + vector subset_indices(params.subset_size); + Mat_ object_subset(1, params.subset_size); + Mat_ image_subset(1, params.subset_size); + + // Current hypothesis result + Mat rot_vec(1, 3, CV_64F); + Mat rot_mat(3, 3, CV_64F); + Mat transl_vec(1, 3, CV_64F); + + // All hypotheses results + Mat rot_matrices(1, params.num_iters * 9, CV_32F); + Mat transl_vectors(1, params.num_iters * 3, CV_32F); + + // Generate set of (rotation, translation) hypotheses using small subsets + // of the input data + for (int iter = 0; iter < params.num_iters; ++iter) // TODO TBB? + { + selectRandom(params.subset_size, num_points, subset_indices); + for (int i = 0; i < params.subset_size; ++i) + { + object_subset(0, i) = object.at(subset_indices[i]); + image_subset(0, i) = image.at(subset_indices[i]); + } + + solvePnP(object_subset, image_subset, camera_mat, dist_coef, rot_vec, transl_vec); + + // Remember translation vector + Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3); + transl_vec = transl_vec.reshape(0, 1); + transl_vec.convertTo(transl_vec_, CV_32F); + + // Remember rotation matrix + Rodrigues(rot_vec, rot_mat); + Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3); + rot_mat.convertTo(rot_mat_, CV_32F); + } + + // Compute scores (i.e. number of inliers) for each hypothesis + GpuMat d_object(object); + GpuMat d_image(image); + GpuMat d_hypothesis_scores(1, params.num_iters, CV_32S); + solve_pnp_ransac::computeHypothesisScores( + params.num_iters, num_points, rot_matrices.ptr(), transl_vectors.ptr(), + d_object.ptr(), d_image.ptr(), camera_mat.ptr(), + params.max_dist * params.max_dist, d_hypothesis_scores.ptr()); + + // Find the best hypothesis index + Point best_idx; + double best_score; + minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx); + int num_inliers = static_cast(best_score); + + // Extract the best hypothesis data + rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3); + Rodrigues(rot_mat, rvec); + rvec = rvec.reshape(0, 1); + tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone(); + tvec = tvec.reshape(0, 1); + + // Build vector of inlier indices + if (params.inliers != NULL) + { + params.inliers->resize(num_inliers); + + Point3f p; + Point3f p_transf; + Point2f p_proj; + const float* rot = rot_mat.ptr(); + const float* transl = tvec.ptr(); + int inlier_id = 0; + + for (int i = 0; i < num_points; ++i) + { + p = object.at(0, i); + p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0]; + p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1]; + p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2]; + if (p_transf.z > 0.f) + { + p_proj.x = camera_mat.at(0, 0) * p_transf.x / p_transf.z + camera_mat.at(0, 2); + p_proj.y = camera_mat.at(1, 1) * p_transf.x / p_transf.z + camera_mat.at(1, 2); + if (norm(p_proj - image.at(0, i)) < params.max_dist) + (*params.inliers)[inlier_id++] = i; + } + } + } +} + #endif + diff --git a/modules/gpu/src/cuda/project_points.cu b/modules/gpu/src/cuda/calib3d.cu similarity index 62% rename from modules/gpu/src/cuda/project_points.cu rename to modules/gpu/src/cuda/calib3d.cu index 08108acf8..e46e4eae1 100644 --- a/modules/gpu/src/cuda/project_points.cu +++ b/modules/gpu/src/cuda/calib3d.cu @@ -43,6 +43,8 @@ #include "internal_shared.hpp" #include "opencv2/gpu/device/transform.hpp" +#define SOLVE_PNP_RANSAC_NUM_ITERS 200 + namespace cv { namespace gpu { namespace transform_points @@ -75,6 +77,7 @@ namespace cv { namespace gpu } } // namespace transform_points + namespace project_points { __constant__ float3 crot0; @@ -114,4 +117,75 @@ namespace cv { namespace gpu } } // namespace project_points + + namespace solve_pnp_ransac + { + __constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_NUM_ITERS * 3]; + __constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_NUM_ITERS]; + __constant__ float3 ccamera_mat[2]; + + __device__ float sqr(float x) + { + return x * x; + } + + __global__ void computeHypothesisScoresKernel( + const int num_points, const float3* object, const float2* image, + const float dist_threshold, int* g_num_inliers) + { + const float3* const &rot_mat = crot_matrices + blockIdx.x * 3; + const float3 &transl_vec = ctransl_vectors[blockIdx.x]; + int num_inliers = 0; + + for (int i = threadIdx.x; i < num_points; i += blockDim.x) + { + float3 p = object[i]; + p = make_float3( + rot_mat[0].x * p.x + rot_mat[0].y * p.y + rot_mat[0].z * p.z + transl_vec.x, + rot_mat[1].x * p.x + rot_mat[1].y * p.y + rot_mat[1].z * p.z + transl_vec.y, + rot_mat[2].x * p.x + rot_mat[2].y * p.y + rot_mat[2].z * p.z + transl_vec.z); + if (p.z > 0) + { + p.x = ccamera_mat[0].x * p.x / p.z + ccamera_mat[0].z; + p.y = ccamera_mat[1].y * p.y / p.z + ccamera_mat[1].z; + float2 image_p = image[i]; + if (sqr(p.x - image_p.x) + sqr(p.y - image_p.y) < dist_threshold) + ++num_inliers; + } + } + + extern __shared__ float s_num_inliers[]; + s_num_inliers[threadIdx.x] = num_inliers; + __syncthreads(); + + for (int step = blockDim.x / 2; step > 0; step >>= 1) + { + if (threadIdx.x < step) + s_num_inliers[threadIdx.x] += s_num_inliers[threadIdx.x + step]; + __syncthreads(); + } + + if (threadIdx.x == 0) + g_num_inliers[blockIdx.x] = s_num_inliers[0]; + } + + void computeHypothesisScores( + const int num_hypotheses, const int num_points, const float* rot_matrices, + const float3* transl_vectors, const float3* object, const float2* image, + const float3* camera_mat, const float dist_threshold, int* hypothesis_scores) + { + cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3))); + cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3))); + cudaSafeCall(cudaMemcpyToSymbol(ccamera_mat, camera_mat, 2 * sizeof(float3))); + + dim3 threads(256); + dim3 grid(num_hypotheses); + int smem_size = threads.x * sizeof(float); + + computeHypothesisScoresKernel<<>>( + num_points, object, image, dist_threshold, hypothesis_scores); + cudaSafeCall(cudaThreadSynchronize()); + } + } // namespace solvepnp_ransac + }} // namespace cv { namespace gpu diff --git a/modules/gpu/test/test_project_points.cpp b/modules/gpu/test/test_calib3d.cpp similarity index 82% rename from modules/gpu/test/test_project_points.cpp rename to modules/gpu/test/test_calib3d.cpp index 95e16f74c..19edcddc5 100644 --- a/modules/gpu/test/test_project_points.cpp +++ b/modules/gpu/test/test_calib3d.cpp @@ -105,3 +105,31 @@ TEST(transformPoints, accuracy) ASSERT_LT(err.dot(err) / res_gold.dot(res_gold), 1e-3f); } } + + +TEST(solvePnpRansac, accuracy) +{ + RNG& rng = TS::ptr()->get_rng(); + + const int num_points = 5000; + Mat object = randomMat(rng, Size(num_points, 1), CV_32FC3, 0, 100, false); + Mat camera_mat = randomMat(rng, Size(3, 3), CV_32F, 1, 1, false); + camera_mat.at(0, 1) = 0.f; + camera_mat.at(1, 0) = 0.f; + camera_mat.at(2, 0) = 0.f; + camera_mat.at(2, 1) = 0.f; + + Mat rvec_gold = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false); + Mat tvec_gold = randomMat(rng, Size(3, 1), CV_32F, 0, 1, false); + + vector image_vec; + projectPoints(object, rvec_gold, tvec_gold, camera_mat, Mat(), image_vec); + Mat image(1, image_vec.size(), CV_32FC2, &image_vec[0]); + + Mat rvec; + Mat tvec; + solvePnpRansac(object, image, camera_mat, Mat(), rvec, tvec, SolvePnpRansacParams()); + + ASSERT_LE(norm(rvec - rvec_gold), 1e-3f); + ASSERT_LE(norm(tvec - tvec_gold), 1e-3f); +}