gpucalib3d module for camera calibration and stereo correspondence

This commit is contained in:
Vladislav Vinogradov
2013-04-17 17:51:19 +04:00
parent fc1fa28556
commit b08b9ab83b
32 changed files with 932 additions and 460 deletions

View File

@@ -5,7 +5,7 @@ endif()
set(the_description "GPU-accelerated Computer Vision")
ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy
opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo
opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d
OPTIONAL opencv_gpunvidia)
ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")

View File

@@ -1,499 +0,0 @@
Camera Calibration and 3D Reconstruction
========================================
.. highlight:: cpp
gpu::StereoBM_GPU
-----------------
.. ocv:class:: gpu::StereoBM_GPU
Class computing stereo correspondence (disparity map) using the block matching algorithm. ::
class StereoBM_GPU
{
public:
enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
StereoBM_GPU();
StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP,
int winSize = DEFAULT_WINSZ);
void operator() (const GpuMat& left, const GpuMat& right,
GpuMat& disparity, Stream& stream = Stream::Null());
static bool checkIfGpuCallReasonable();
int preset;
int ndisp;
int winSize;
float avergeTexThreshold;
...
};
The class also performs pre- and post-filtering steps: Sobel pre-filtering (if ``PREFILTER_XSOBEL`` flag is set) and low textureness filtering (if ``averageTexThreshols > 0`` ). If ``avergeTexThreshold = 0`` , low textureness filtering is disabled. Otherwise, the disparity is set to 0 in each point ``(x, y)`` , where for the left image
.. math::
\sum HorizontalGradiensInWindow(x, y, winSize) < (winSize \cdot winSize) \cdot avergeTexThreshold
This means that the input left image is low textured.
gpu::StereoBM_GPU::StereoBM_GPU
-----------------------------------
Enables :ocv:class:`gpu::StereoBM_GPU` constructors.
.. ocv:function:: gpu::StereoBM_GPU::StereoBM_GPU()
.. ocv:function:: gpu::StereoBM_GPU::StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ)
:param preset: Parameter presetting:
* **BASIC_PRESET** Basic mode without pre-processing.
* **PREFILTER_XSOBEL** Sobel pre-filtering mode.
:param ndisparities: Number of disparities. It must be a multiple of 8 and less or equal to 256.
:param winSize: Block size.
gpu::StereoBM_GPU::operator ()
----------------------------------
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
.. ocv:function:: void gpu::StereoBM_GPU::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null())
:param left: Left image. Only ``CV_8UC1`` type is supported.
:param right: Right image with the same size and the same type as the left one.
:param disparity: Output disparity map. It is a ``CV_8UC1`` image with the same size as the input images.
:param stream: Stream for the asynchronous version.
gpu::StereoBM_GPU::checkIfGpuCallReasonable
-----------------------------------------------
Uses a heuristic method to estimate whether the current GPU is faster than the CPU in this algorithm. It queries the currently active device.
.. ocv:function:: bool gpu::StereoBM_GPU::checkIfGpuCallReasonable()
gpu::StereoBeliefPropagation
----------------------------
.. ocv:class:: gpu::StereoBeliefPropagation
Class computing stereo correspondence using the belief propagation algorithm. ::
class StereoBeliefPropagation
{
public:
enum { DEFAULT_NDISP = 64 };
enum { DEFAULT_ITERS = 5 };
enum { DEFAULT_LEVELS = 5 };
static void estimateRecommendedParams(int width, int height,
int& ndisp, int& iters, int& levels);
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int msg_type = CV_32F);
StereoBeliefPropagation(int ndisp, int iters, int levels,
float max_data_term, float data_weight,
float max_disc_term, float disc_single_jump,
int msg_type = CV_32F);
void operator()(const GpuMat& left, const GpuMat& right,
GpuMat& disparity, Stream& stream = Stream::Null());
void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
int ndisp;
int iters;
int levels;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int msg_type;
...
};
The class implements algorithm described in [Felzenszwalb2006]_ . It can compute own data cost (using a truncated linear model) or use a user-provided data cost.
.. note::
``StereoBeliefPropagation`` requires a lot of memory for message storage:
.. math::
width \_ step \cdot height \cdot ndisp \cdot 4 \cdot (1 + 0.25)
and for data cost storage:
.. math::
width\_step \cdot height \cdot ndisp \cdot (1 + 0.25 + 0.0625 + \dotsm + \frac{1}{4^{levels}})
``width_step`` is the number of bytes in a line including padding.
gpu::StereoBeliefPropagation::StereoBeliefPropagation
---------------------------------------------------------
Enables the :ocv:class:`gpu::StereoBeliefPropagation` constructors.
.. ocv:function:: gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_32F)
.. ocv:function:: gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F)
:param ndisp: Number of disparities.
:param iters: Number of BP iterations on each level.
:param levels: Number of levels.
:param max_data_term: Threshold for data cost truncation.
:param data_weight: Data weight.
:param max_disc_term: Threshold for discontinuity truncation.
:param disc_single_jump: Discontinuity single jump.
:param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported.
``StereoBeliefPropagation`` uses a truncated linear model for the data cost and discontinuity terms:
.. math::
DataCost = data \_ weight \cdot \min ( \lvert Img_Left(x,y)-Img_Right(x-d,y) \rvert , max \_ data \_ term)
.. math::
DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)
For more details, see [Felzenszwalb2006]_.
By default, :ocv:class:`gpu::StereoBeliefPropagation` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
.. math::
10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX
gpu::StereoBeliefPropagation::estimateRecommendedParams
-----------------------------------------------------------
Uses a heuristic method to compute the recommended parameters ( ``ndisp``, ``iters`` and ``levels`` ) for the specified image size ( ``width`` and ``height`` ).
.. ocv:function:: void gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
gpu::StereoBeliefPropagation::operator ()
---------------------------------------------
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair or data cost.
.. ocv:function:: void gpu::StereoBeliefPropagation::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null())
.. ocv:function:: void gpu::StereoBeliefPropagation::operator ()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null())
:param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported.
:param right: Right image with the same size and the same type as the left one.
:param data: User-specified data cost, a matrix of ``msg_type`` type and ``Size(<image columns>*ndisp, <image rows>)`` size.
:param disparity: Output disparity map. If ``disparity`` is empty, the output type is ``CV_16SC1`` . Otherwise, the type is retained.
:param stream: Stream for the asynchronous version.
gpu::StereoConstantSpaceBP
--------------------------
.. ocv:class:: gpu::StereoConstantSpaceBP
Class computing stereo correspondence using the constant space belief propagation algorithm. ::
class StereoConstantSpaceBP
{
public:
enum { DEFAULT_NDISP = 128 };
enum { DEFAULT_ITERS = 8 };
enum { DEFAULT_LEVELS = 4 };
enum { DEFAULT_NR_PLANE = 4 };
static void estimateRecommendedParams(int width, int height,
int& ndisp, int& iters, int& levels, int& nr_plane);
explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int nr_plane = DEFAULT_NR_PLANE,
int msg_type = CV_32F);
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
float max_data_term, float data_weight,
float max_disc_term, float disc_single_jump,
int min_disp_th = 0,
int msg_type = CV_32F);
void operator()(const GpuMat& left, const GpuMat& right,
GpuMat& disparity, Stream& stream = Stream::Null());
int ndisp;
int iters;
int levels;
int nr_plane;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int min_disp_th;
int msg_type;
bool use_local_init_data_cost;
...
};
The class implements algorithm described in [Yang2010]_. ``StereoConstantSpaceBP`` supports both local minimum and global minimum data cost initialization algorithms. For more details, see the paper mentioned above. By default, a local algorithm is used. To enable a global algorithm, set ``use_local_init_data_cost`` to ``false`` .
gpu::StereoConstantSpaceBP::StereoConstantSpaceBP
-----------------------------------------------------
Enables the :ocv:class:`gpu::StereoConstantSpaceBP` constructors.
.. ocv:function:: gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F)
.. ocv:function:: gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th = 0, int msg_type = CV_32F)
:param ndisp: Number of disparities.
:param iters: Number of BP iterations on each level.
:param levels: Number of levels.
:param nr_plane: Number of disparity levels on the first level.
:param max_data_term: Truncation of data cost.
:param data_weight: Data weight.
:param max_disc_term: Truncation of discontinuity.
:param disc_single_jump: Discontinuity single jump.
:param min_disp_th: Minimal disparity threshold.
:param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported.
``StereoConstantSpaceBP`` uses a truncated linear model for the data cost and discontinuity terms:
.. math::
DataCost = data \_ weight \cdot \min ( \lvert I_2-I_1 \rvert , max \_ data \_ term)
.. math::
DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)
For more details, see [Yang2010]_.
By default, ``StereoConstantSpaceBP`` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
.. math::
10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX
gpu::StereoConstantSpaceBP::estimateRecommendedParams
---------------------------------------------------------
Uses a heuristic method to compute parameters (ndisp, iters, levelsand nrplane) for the specified image size (widthand height).
.. ocv:function:: void gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
gpu::StereoConstantSpaceBP::operator ()
-------------------------------------------
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
.. ocv:function:: void gpu::StereoConstantSpaceBP::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null())
:param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported.
:param right: Right image with the same size and the same type as the left one.
:param disparity: Output disparity map. If ``disparity`` is empty, the output type is ``CV_16SC1`` . Otherwise, the output type is ``disparity.type()`` .
:param stream: Stream for the asynchronous version.
gpu::DisparityBilateralFilter
-----------------------------
.. ocv:class:: gpu::DisparityBilateralFilter
Class refining a disparity map using joint bilateral filtering. ::
class CV_EXPORTS DisparityBilateralFilter
{
public:
enum { DEFAULT_NDISP = 64 };
enum { DEFAULT_RADIUS = 3 };
enum { DEFAULT_ITERS = 1 };
explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP,
int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS);
DisparityBilateralFilter(int ndisp, int radius, int iters,
float edge_threshold, float max_disc_threshold,
float sigma_range);
void operator()(const GpuMat& disparity, const GpuMat& image,
GpuMat& dst, Stream& stream = Stream::Null());
...
};
The class implements [Yang2010]_ algorithm.
gpu::DisparityBilateralFilter::DisparityBilateralFilter
-----------------------------------------------------------
Enables the :ocv:class:`gpu::DisparityBilateralFilter` constructors.
.. ocv:function:: gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS)
.. ocv:function:: gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range)
:param ndisp: Number of disparities.
:param radius: Filter radius.
:param iters: Number of iterations.
:param edge_threshold: Threshold for edges.
:param max_disc_threshold: Constant to reject outliers.
:param sigma_range: Filter range.
gpu::DisparityBilateralFilter::operator ()
----------------------------------------------
Refines a disparity map using joint bilateral filtering.
.. ocv:function:: void gpu::DisparityBilateralFilter::operator ()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null())
:param disparity: Input disparity map. ``CV_8UC1`` and ``CV_16SC1`` types are supported.
:param image: Input image. ``CV_8UC1`` and ``CV_8UC3`` types are supported.
:param dst: Destination disparity map. It has the same size and type as ``disparity`` .
:param stream: Stream for the asynchronous version.
gpu::drawColorDisp
----------------------
Colors a disparity image.
.. ocv:function:: void gpu::drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null())
:param src_disp: Source disparity image. ``CV_8UC1`` and ``CV_16SC1`` types are supported.
:param dst_disp: Output disparity image. It has the same size as ``src_disp`` . The type is ``CV_8UC4`` in ``BGRA`` format (alpha = 255).
:param ndisp: Number of disparities.
:param stream: Stream for the asynchronous version.
This function draws a colored disparity map by converting disparity values from ``[0..ndisp)`` interval first to ``HSV`` color space (where different disparity values correspond to different hues) and then converting the pixels to ``RGB`` for visualization.
gpu::reprojectImageTo3D
---------------------------
Reprojects a disparity image to 3D space.
.. ocv:function:: void gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null())
:param disp: Input disparity image. ``CV_8U`` and ``CV_16S`` types are supported.
:param xyzw: Output 3- or 4-channel floating-point image of the same size as ``disp`` . Each element of ``xyzw(x,y)`` contains 3D coordinates ``(x,y,z)`` or ``(x,y,z,1)`` of the point ``(x,y)`` , computed from the disparity map.
:param Q: :math:`4 \times 4` perspective transformation matrix that can be obtained via :ocv:func:`stereoRectify` .
:param dst_cn: The number of channels for output image. Can be 3 or 4.
:param stream: Stream for the asynchronous version.
.. seealso:: :ocv:func:`reprojectImageTo3D`
gpu::solvePnPRansac
-------------------
Finds the object pose from 3D-2D point correspondences.
.. ocv:function:: void gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, int num_iters=100, float max_dist=8.0, int min_inlier_count=100, vector<int>* inliers=NULL)
:param object: Single-row matrix of object points.
:param image: Single-row matrix of image points.
:param camera_mat: 3x3 matrix of intrinsic camera parameters.
:param dist_coef: Distortion coefficients. See :ocv:func:`undistortPoints` for details.
:param rvec: Output 3D rotation vector.
:param tvec: Output 3D translation vector.
:param use_extrinsic_guess: Flag to indicate that the function must use ``rvec`` and ``tvec`` as an initial transformation guess. It is not supported for now.
:param num_iters: Maximum number of RANSAC iterations.
:param max_dist: Euclidean distance threshold to detect whether point is inlier or not.
:param min_inlier_count: Flag to indicate that the function must stop if greater or equal number of inliers is achieved. It is not supported for now.
:param inliers: Output vector of inlier indices.
.. seealso:: :ocv:func:`solvePnPRansac`
.. [Felzenszwalb2006] Pedro F. Felzenszwalb algorithm [Pedro F. Felzenszwalb and Daniel P. Huttenlocher. *Efficient belief propagation for early vision*. International Journal of Computer Vision, 70(1), October 2006
.. [Yang2010] Q. Yang, L. Wang, and N. Ahuja. *A constant-space belief propagation algorithm for stereo matching*. In CVPR, 2010.

View File

@@ -9,4 +9,3 @@ gpu. GPU-accelerated Computer Vision
initalization_and_information
data_structures
object_detection
camera_calibration_and_3d_reconstruction

View File

@@ -55,6 +55,7 @@
#include "opencv2/gpuimgproc.hpp"
#include "opencv2/gpufeatures2d.hpp"
#include "opencv2/gpuvideo.hpp"
#include "opencv2/gpucalib3d.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/objdetect.hpp"
@@ -68,18 +69,6 @@ namespace cv { namespace gpu {
///////////////////////////// Calibration 3D //////////////////////////////////
CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
GpuMat& dst, Stream& stream = Stream::Null());
CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
Stream& stream = Stream::Null());
CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
std::vector<int>* inliers=NULL);
//////////////////////////////// Image Labeling ////////////////////////////////
@@ -90,190 +79,17 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c
//////////////////////////////// StereoBM_GPU ////////////////////////////////
class CV_EXPORTS StereoBM_GPU
{
public:
enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
//! the default constructor
StereoBM_GPU();
//! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
//! Output disparity has CV_8U type.
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
//! Some heuristics that tries to estmate
// if current GPU will be faster than CPU in this algorithm.
// It queries current active device.
static bool checkIfGpuCallReasonable();
int preset;
int ndisp;
int winSize;
// If avergeTexThreshold == 0 => post procesing is disabled
// If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
// i.e. input left image is low textured.
float avergeTexThreshold;
private:
GpuMat minSSD, leBuf, riBuf;
};
////////////////////////// StereoBeliefPropagation ///////////////////////////
// "Efficient Belief Propagation for Early Vision"
// P.Felzenszwalb
class CV_EXPORTS StereoBeliefPropagation
{
public:
enum { DEFAULT_NDISP = 64 };
enum { DEFAULT_ITERS = 5 };
enum { DEFAULT_LEVELS = 5 };
static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels);
//! the default constructor
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int msg_type = CV_32F);
//! the full constructor taking the number of disparities, number of BP iterations on each level,
//! number of levels, truncation of data cost, data weight,
//! truncation of discontinuity cost and discontinuity single jump
//! DataTerm = data_weight * min(fabs(I2-I1), max_data_term)
//! DiscTerm = min(disc_single_jump * fabs(f1-f2), max_disc_term)
//! please see paper for more details
StereoBeliefPropagation(int ndisp, int iters, int levels,
float max_data_term, float data_weight,
float max_disc_term, float disc_single_jump,
int msg_type = CV_32F);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
//! version for user specified data term
void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
int ndisp;
int iters;
int levels;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int msg_type;
private:
GpuMat u, d, l, r, u2, d2, l2, r2;
std::vector<GpuMat> datas;
GpuMat out;
};
/////////////////////////// StereoConstantSpaceBP ///////////////////////////
// "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
// Qingxiong Yang, Liang Wang, Narendra Ahuja
// http://vision.ai.uiuc.edu/~qyang6/
class CV_EXPORTS StereoConstantSpaceBP
{
public:
enum { DEFAULT_NDISP = 128 };
enum { DEFAULT_ITERS = 8 };
enum { DEFAULT_LEVELS = 4 };
enum { DEFAULT_NR_PLANE = 4 };
static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane);
//! the default constructor
explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int nr_plane = DEFAULT_NR_PLANE,
int msg_type = CV_32F);
//! the full constructor taking the number of disparities, number of BP iterations on each level,
//! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
//! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
int min_disp_th = 0,
int msg_type = CV_32F);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
int ndisp;
int iters;
int levels;
int nr_plane;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int min_disp_th;
int msg_type;
bool use_local_init_data_cost;
private:
GpuMat messages_buffers;
GpuMat temp;
GpuMat out;
};
/////////////////////////// DisparityBilateralFilter ///////////////////////////
// Disparity map refinement using joint bilateral filtering given a single color image.
// Qingxiong Yang, Liang Wang, Narendra Ahuja
// http://vision.ai.uiuc.edu/~qyang6/
class CV_EXPORTS DisparityBilateralFilter
{
public:
enum { DEFAULT_NDISP = 64 };
enum { DEFAULT_RADIUS = 3 };
enum { DEFAULT_ITERS = 1 };
//! the default constructor
explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS);
//! the full constructor taking the number of disparities, filter radius,
//! number of iterations, truncation of data continuity, truncation of disparity continuity
//! and filter range sigma
DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range);
//! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
//! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null());
private:
int ndisp;
int radius;
int iters;
float edge_threshold;
float max_disc_threshold;
float sigma_range;
GpuMat table_color;
GpuMat table_space;
};
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////

View File

@@ -1,371 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
using namespace std;
using namespace testing;
using namespace perf;
//////////////////////////////////////////////////////////////////////
// StereoBM
typedef std::tr1::tuple<string, string> pair_string;
DEF_PARAM_TEST_1(ImagePair, pair_string);
PERF_TEST_P(ImagePair, Calib3D_StereoBM,
Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
{
declare.time(300.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int preset = 0;
const int ndisp = 256;
if (PERF_RUN_GPU())
{
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
const cv::gpu::GpuMat d_imgLeft(imgLeft);
const cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat dst;
TEST_CYCLE() d_bm(d_imgLeft, d_imgRight, dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Ptr<cv::StereoBM> bm = cv::createStereoBM(ndisp);
cv::Mat dst;
TEST_CYCLE() bm->compute(imgLeft, imgRight, dst);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// StereoBeliefPropagation
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
{
declare.time(300.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0));
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1));
ASSERT_FALSE(imgRight.empty());
const int ndisp = 64;
if (PERF_RUN_GPU())
{
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
const cv::gpu::GpuMat d_imgLeft(imgLeft);
const cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat dst;
TEST_CYCLE() d_bp(d_imgLeft, d_imgRight, dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////////////////////
// StereoConstantSpaceBP
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
{
declare.time(300.0);
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgLeft.empty());
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(imgRight.empty());
const int ndisp = 128;
if (PERF_RUN_GPU())
{
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
const cv::gpu::GpuMat d_imgLeft(imgLeft);
const cv::gpu::GpuMat d_imgRight(imgRight);
cv::gpu::GpuMat dst;
TEST_CYCLE() d_csbp(d_imgLeft, d_imgRight, dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////////////////////
// DisparityBilateralFilter
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
{
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
const cv::Mat disp = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(disp.empty());
const int ndisp = 128;
if (PERF_RUN_GPU())
{
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
const cv::gpu::GpuMat d_img(img);
const cv::gpu::GpuMat d_disp(disp);
cv::gpu::GpuMat dst;
TEST_CYCLE() d_filter(d_disp, d_img, dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////////////////////
// TransformPoints
DEF_PARAM_TEST_1(Count, int);
PERF_TEST_P(Count, Calib3D_TransformPoints,
Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
declare.in(src, WARMUP_RNG);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() cv::gpu::transformPoints(d_src, rvec, tvec, dst);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL_NO_CPU();
}
}
//////////////////////////////////////////////////////////////////////
// ProjectPoints
PERF_TEST_P(Count, Calib3D_ProjectPoints,
Values(5000, 10000, 20000))
{
const int count = GetParam();
cv::Mat src(1, count, CV_32FC3);
declare.in(src, WARMUP_RNG);
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
TEST_CYCLE() cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// SolvePnPRansac
PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
Values(5000, 10000, 20000))
{
declare.time(10.0);
const int count = GetParam();
cv::Mat object(1, count, CV_32FC3);
declare.in(object, WARMUP_RNG);
cv::Mat camera_mat(3, 3, CV_32FC1);
cv::randu(camera_mat, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
cv::Mat rvec_gold(1, 3, CV_32FC1);
cv::randu(rvec_gold, 0, 1);
cv::Mat tvec_gold(1, 3, CV_32FC1);
cv::randu(tvec_gold, 0, 1);
std::vector<cv::Point2f> image_vec;
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
const cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
cv::Mat rvec;
cv::Mat tvec;
if (PERF_RUN_GPU())
{
TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
GPU_SANITY_CHECK(rvec, 1e-3);
GPU_SANITY_CHECK(tvec, 1e-3);
}
else
{
TEST_CYCLE() cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
CPU_SANITY_CHECK(rvec, 1e-6);
CPU_SANITY_CHECK(tvec, 1e-6);
}
}
//////////////////////////////////////////////////////////////////////
// ReprojectImageTo3D
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int depth = GET_PARAM(1);
cv::Mat src(size, depth);
declare.in(src, WARMUP_RNG);
cv::Mat Q(4, 4, CV_32FC1);
cv::randu(Q, 0.1, 1.0);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() cv::gpu::reprojectImageTo3D(d_src, dst, Q);
GPU_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
TEST_CYCLE() cv::reprojectImageTo3D(src, dst, Q);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// DrawColorDisp
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp,
Combine(GPU_TYPICAL_MAT_SIZES,
Values(CV_8U, CV_16S)))
{
const cv::Size size = GET_PARAM(0);
const int type = GET_PARAM(1);
cv::Mat src(size, type);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_GPU())
{
const cv::gpu::GpuMat d_src(src);
cv::gpu::GpuMat dst;
TEST_CYCLE() cv::gpu::drawColorDisp(d_src, dst, 255);
GPU_SANITY_CHECK(dst);
}
else
{
FAIL_NO_CPU();
}
}

View File

@@ -1,294 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, std::vector<int>*) { throw_no_cuda(); }
#else
namespace cv { namespace gpu { namespace cudev
{
namespace transform_points
{
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
}
namespace project_points
{
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
}
namespace solve_pnp_ransac
{
int maxNumIters();
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores);
}
}}}
using namespace ::cv::gpu::cudev;
namespace
{
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
// Convert rotation vector into matrix
Mat rot;
Rodrigues(rvec, rot);
dst.create(src.size(), src.type());
transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
}
}
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
{
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
}
namespace
{
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
CV_Assert(dist_coef.empty()); // Undistortion isn't supported
// Convert rotation vector into matrix
Mat rot;
Rodrigues(rvec, rot);
dst.create(src.size(), CV_32FC2);
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
}
}
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
{
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
}
namespace
{
// Selects subset_size random different points from [0, num_points - 1] range
void selectRandom(int subset_size, int num_points, std::vector<int>& subset)
{
subset.resize(subset_size);
for (int i = 0; i < subset_size; ++i)
{
bool was;
do
{
subset[i] = rand() % num_points;
was = false;
for (int j = 0; j < i; ++j)
if (subset[j] == subset[i])
{
was = true;
break;
}
} while (was);
}
}
// Computes rotation, translation pair for small subsets if the input data
class TransformHypothesesGenerator
{
public:
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
const Mat& camera_mat_, int num_points_, int subset_size_,
Mat rot_matrices_, Mat transl_vectors_)
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
transl_vectors(transl_vectors_) {}
void operator()(const BlockedRange& range) const
{
// Input data for generation of the current hypothesis
std::vector<int> subset_indices(subset_size);
Mat_<Point3f> object_subset(1, subset_size);
Mat_<Point2f> image_subset(1, subset_size);
// Current hypothesis data
Mat rot_vec(1, 3, CV_64F);
Mat rot_mat(3, 3, CV_64F);
Mat transl_vec(1, 3, CV_64F);
for (int iter = range.begin(); iter < range.end(); ++iter)
{
selectRandom(subset_size, num_points, subset_indices);
for (int i = 0; i < subset_size; ++i)
{
object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
}
solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
// Remember translation vector
Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
transl_vec = transl_vec.reshape(0, 1);
transl_vec.convertTo(transl_vec_, CV_32F);
// Remember rotation matrix
Rodrigues(rot_vec, rot_mat);
Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
rot_mat.convertTo(rot_mat_, CV_32F);
}
}
const Mat* object;
const Mat* image;
const Mat* dist_coef;
const Mat* camera_mat;
int num_points;
int subset_size;
// Hypotheses storage (global)
Mat rot_matrices;
Mat transl_vectors;
};
}
void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
int num_iters, float max_dist, int min_inlier_count,
std::vector<int>* inliers)
{
(void)min_inlier_count;
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
CV_Assert(object.cols == image.cols);
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
const int subset_size = 4;
const int num_points = object.cols;
CV_Assert(num_points >= subset_size);
// Unapply distortion and intrinsic camera transformations
Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
Mat empty_dist_coef;
Mat image_normalized;
undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
// Hypotheses storage (global)
Mat rot_matrices(1, num_iters * 9, CV_32F);
Mat transl_vectors(1, num_iters * 3, CV_32F);
// Generate set of hypotheses using small subsets of the input data
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
num_points, subset_size, rot_matrices, transl_vectors);
parallel_for(BlockedRange(0, num_iters), body);
// Compute scores (i.e. number of inliers) for each hypothesis
GpuMat d_object(object);
GpuMat d_image_normalized(image_normalized);
GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
solve_pnp_ransac::computeHypothesisScores(
num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
d_hypothesis_scores.ptr<int>());
// Find the best hypothesis index
Point best_idx;
double best_score;
minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
int num_inliers = static_cast<int>(best_score);
// Extract the best hypothesis data
Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
Rodrigues(rot_mat, rvec);
rvec = rvec.reshape(0, 1);
tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
tvec = tvec.reshape(0, 1);
// Build vector of inlier indices
if (inliers != NULL)
{
inliers->clear();
inliers->reserve(num_inliers);
Point3f p, p_transf;
Point2f p_proj;
const float* rot = rot_mat.ptr<float>();
const float* transl = tvec.ptr<float>();
for (int i = 0; i < num_points; ++i)
{
p = object.at<Point3f>(0, i);
p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
p_proj.x = p_transf.x / p_transf.z;
p_proj.y = p_transf.y / p_transf.z;
if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
inliers->push_back(i);
}
}
}
#endif

View File

@@ -1,193 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/reduce.hpp"
namespace cv { namespace gpu { namespace cudev
{
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
namespace transform_points
{
__constant__ float3 crot0;
__constant__ float3 crot1;
__constant__ float3 crot2;
__constant__ float3 ctransl;
struct TransformOp : unary_function<float3, float3>
{
__device__ __forceinline__ float3 operator()(const float3& p) const
{
return make_float3(
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
}
__device__ __forceinline__ TransformOp() {}
__device__ __forceinline__ TransformOp(const TransformOp&) {}
};
void call(const PtrStepSz<float3> src, const float* rot,
const float* transl, PtrStepSz<float3> dst,
cudaStream_t stream)
{
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
cv::gpu::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
}
} // namespace transform_points
namespace project_points
{
__constant__ float3 crot0;
__constant__ float3 crot1;
__constant__ float3 crot2;
__constant__ float3 ctransl;
__constant__ float3 cproj0;
__constant__ float3 cproj1;
struct ProjectOp : unary_function<float3, float3>
{
__device__ __forceinline__ float2 operator()(const float3& p) const
{
// Rotate and translate in 3D
float3 t = make_float3(
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
// Project on 2D plane
return make_float2(
(cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
(cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
}
__device__ __forceinline__ ProjectOp() {}
__device__ __forceinline__ ProjectOp(const ProjectOp&) {}
};
void call(const PtrStepSz<float3> src, const float* rot,
const float* transl, const float* proj, PtrStepSz<float2> dst,
cudaStream_t stream)
{
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
cv::gpu::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
}
} // namespace project_points
namespace solve_pnp_ransac
{
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
__constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS];
int maxNumIters()
{
return SOLVE_PNP_RANSAC_MAX_NUM_ITERS;
}
__device__ __forceinline__ float sqr(float x)
{
return x * x;
}
template <int BLOCK_SIZE>
__global__ void computeHypothesisScoresKernel(
const int num_points, const float3* object, const float2* image,
const float dist_threshold, int* g_num_inliers)
{
const float3* const &rot_mat = crot_matrices + blockIdx.x * 3;
const float3 &transl_vec = ctransl_vectors[blockIdx.x];
int num_inliers = 0;
for (int i = threadIdx.x; i < num_points; i += blockDim.x)
{
float3 p = object[i];
p = make_float3(
rot_mat[0].x * p.x + rot_mat[0].y * p.y + rot_mat[0].z * p.z + transl_vec.x,
rot_mat[1].x * p.x + rot_mat[1].y * p.y + rot_mat[1].z * p.z + transl_vec.y,
rot_mat[2].x * p.x + rot_mat[2].y * p.y + rot_mat[2].z * p.z + transl_vec.z);
p.x /= p.z;
p.y /= p.z;
float2 image_p = image[i];
if (sqr(p.x - image_p.x) + sqr(p.y - image_p.y) < dist_threshold)
++num_inliers;
}
__shared__ int s_num_inliers[BLOCK_SIZE];
reduce<BLOCK_SIZE>(s_num_inliers, num_inliers, threadIdx.x, plus<int>());
if (threadIdx.x == 0)
g_num_inliers[blockIdx.x] = num_inliers;
}
void computeHypothesisScores(
const int num_hypotheses, const int num_points, const float* rot_matrices,
const float3* transl_vectors, const float3* object, const float2* image,
const float dist_threshold, int* hypothesis_scores)
{
cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3)));
cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3)));
dim3 threads(256);
dim3 grid(num_hypotheses);
computeHypothesisScoresKernel<256><<<grid, threads>>>(
num_points, object, image, dist_threshold, hypothesis_scores);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
} // namespace solvepnp_ransac
}}} // namespace cv { namespace gpu { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -1,223 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/limits.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace disp_bilateral_filter
{
__constant__ float* ctable_color;
__constant__ float* ctable_space;
__constant__ size_t ctable_space_step;
__constant__ int cndisp;
__constant__ int cradius;
__constant__ short cedge_disc;
__constant__ short cmax_disc;
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)
{
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
size_t table_space_step = table_space.step / sizeof(float);
cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
}
template <int channels>
struct DistRgbMax
{
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{
uchar x = ::abs(a[0] - b[0]);
uchar y = ::abs(a[1] - b[1]);
uchar z = ::abs(a[2] - b[2]);
return (::max(::max(x, y), z));
}
};
template <>
struct DistRgbMax<1>
{
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
{
return ::abs(a[0] - b[0]);
}
};
template <int channels, typename T>
__global__ void disp_bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
T dp[5];
if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
{
dp[0] = *(disp + (y ) * disp_step + x + 0);
dp[1] = *(disp + (y-1) * disp_step + x + 0);
dp[2] = *(disp + (y ) * disp_step + x - 1);
dp[3] = *(disp + (y+1) * disp_step + x + 0);
dp[4] = *(disp + (y ) * disp_step + x + 1);
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
{
const int ymin = ::max(0, y - cradius);
const int xmin = ::max(0, x - cradius);
const int ymax = ::min(h - 1, y + cradius);
const int xmax = ::min(w - 1, x + cradius);
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
const uchar* ic = img + y * img_step + channels * x;
for(int yi = ymin; yi <= ymax; yi++)
{
const T* disp_y = disp + yi * disp_step;
for(int xi = xmin; xi <= xmax; xi++)
{
const uchar* in = img + yi * img_step + channels * xi;
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
const float weight = ctable_color[dist_rgb] * (ctable_space + ::abs(y-yi)* ctable_space_step)[::abs(x-xi)];
const T disp_reg = disp_y[xi];
cost[0] += ::min(cmax_disc, ::abs(disp_reg - dp[0])) * weight;
cost[1] += ::min(cmax_disc, ::abs(disp_reg - dp[1])) * weight;
cost[2] += ::min(cmax_disc, ::abs(disp_reg - dp[2])) * weight;
cost[3] += ::min(cmax_disc, ::abs(disp_reg - dp[3])) * weight;
cost[4] += ::min(cmax_disc, ::abs(disp_reg - dp[4])) * weight;
}
}
float minimum = numeric_limits<float>::max();
int id = 0;
if (cost[0] < minimum)
{
minimum = cost[0];
id = 0;
}
if (cost[1] < minimum)
{
minimum = cost[1];
id = 1;
}
if (cost[2] < minimum)
{
minimum = cost[2];
id = 2;
}
if (cost[3] < minimum)
{
minimum = cost[3];
id = 3;
}
if (cost[4] < minimum)
{
minimum = cost[4];
id = 4;
}
*(disp + y * disp_step + x) = dp[id];
}
}
}
template <typename T>
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(disp.cols, threads.x << 1);
grid.y = divUp(disp.rows, threads.y);
switch (channels)
{
case 1:
for (int i = 0; i < iters; ++i)
{
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
}
break;
case 3:
for (int i = 0; i < iters; ++i)
{
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
cudaSafeCall( cudaGetLastError() );
}
break;
default:
CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void disp_bilateral_filter<uchar>(PtrStepSz<uchar> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
template void disp_bilateral_filter<short>(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
} // namespace bilateral_filter
}}} // namespace cv { namespace gpu { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -1,540 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace stereobm
{
//////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
#define ROWSperTHREAD 21 // the number of rows a thread will process
#define BLOCK_W 128 // the thread block width (464)
#define N_DISPARITIES 8
#define STEREO_MIND 0 // The minimum d range to check
#define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing
__constant__ unsigned int* cminSSDImage;
__constant__ size_t cminSSD_step;
__constant__ int cwidth;
__constant__ int cheight;
__device__ __forceinline__ int SQ(int a)
{
return a * a;
}
template<int RADIUS>
__device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
{
unsigned int cache = 0;
unsigned int cache2 = 0;
for(int i = 1; i <= RADIUS; i++)
cache += col_ssd[i];
col_ssd_cache[0] = cache;
__syncthreads();
if (threadIdx.x < BLOCK_W - RADIUS)
cache2 = col_ssd_cache[RADIUS];
else
for(int i = RADIUS + 1; i < (2 * RADIUS + 1); i++)
cache2 += col_ssd[i];
return col_ssd[0] + cache + cache2;
}
template<int RADIUS>
__device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
{
unsigned int ssd[N_DISPARITIES];
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
ssd[0] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * RADIUS));
__syncthreads();
ssd[1] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * RADIUS));
__syncthreads();
ssd[2] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * RADIUS));
__syncthreads();
ssd[3] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * RADIUS));
__syncthreads();
ssd[4] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * RADIUS));
__syncthreads();
ssd[5] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * RADIUS));
__syncthreads();
ssd[6] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * RADIUS));
__syncthreads();
ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS));
int mssd = ::min(::min(::min(ssd[0], ssd[1]), ::min(ssd[4], ssd[5])), ::min(::min(ssd[2], ssd[3]), ::min(ssd[6], ssd[7])));
int bestIdx = 0;
for (int i = 0; i < N_DISPARITIES; i++)
{
if (mssd == ssd[i])
bestIdx = i;
}
return make_uint2(mssd, bestIdx);
}
template<int RADIUS>
__device__ void StepDown(int idx1, int idx2, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
{
unsigned char leftPixel1;
unsigned char leftPixel2;
unsigned char rightPixel1[8];
unsigned char rightPixel2[8];
unsigned int diff1, diff2;
leftPixel1 = imageL[idx1];
leftPixel2 = imageL[idx2];
idx1 = idx1 - d;
idx2 = idx2 - d;
rightPixel1[7] = imageR[idx1 - 7];
rightPixel1[0] = imageR[idx1 - 0];
rightPixel1[1] = imageR[idx1 - 1];
rightPixel1[2] = imageR[idx1 - 2];
rightPixel1[3] = imageR[idx1 - 3];
rightPixel1[4] = imageR[idx1 - 4];
rightPixel1[5] = imageR[idx1 - 5];
rightPixel1[6] = imageR[idx1 - 6];
rightPixel2[7] = imageR[idx2 - 7];
rightPixel2[0] = imageR[idx2 - 0];
rightPixel2[1] = imageR[idx2 - 1];
rightPixel2[2] = imageR[idx2 - 2];
rightPixel2[3] = imageR[idx2 - 3];
rightPixel2[4] = imageR[idx2 - 4];
rightPixel2[5] = imageR[idx2 - 5];
rightPixel2[6] = imageR[idx2 - 6];
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
diff1 = leftPixel1 - rightPixel1[0];
diff2 = leftPixel2 - rightPixel2[0];
col_ssd[0 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[1];
diff2 = leftPixel2 - rightPixel2[1];
col_ssd[1 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[2];
diff2 = leftPixel2 - rightPixel2[2];
col_ssd[2 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[3];
diff2 = leftPixel2 - rightPixel2[3];
col_ssd[3 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[4];
diff2 = leftPixel2 - rightPixel2[4];
col_ssd[4 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[5];
diff2 = leftPixel2 - rightPixel2[5];
col_ssd[5 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[6];
diff2 = leftPixel2 - rightPixel2[6];
col_ssd[6 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
diff1 = leftPixel1 - rightPixel1[7];
diff2 = leftPixel2 - rightPixel2[7];
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
}
template<int RADIUS>
__device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
{
unsigned char leftPixel1;
int idx;
unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0};
for(int i = 0; i < (2 * RADIUS + 1); i++)
{
idx = y_tex * im_pitch + x_tex;
leftPixel1 = imageL[idx];
idx = idx - d;
diffa[0] += SQ(leftPixel1 - imageR[idx - 0]);
diffa[1] += SQ(leftPixel1 - imageR[idx - 1]);
diffa[2] += SQ(leftPixel1 - imageR[idx - 2]);
diffa[3] += SQ(leftPixel1 - imageR[idx - 3]);
diffa[4] += SQ(leftPixel1 - imageR[idx - 4]);
diffa[5] += SQ(leftPixel1 - imageR[idx - 5]);
diffa[6] += SQ(leftPixel1 - imageR[idx - 6]);
diffa[7] += SQ(leftPixel1 - imageR[idx - 7]);
y_tex += 1;
}
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
col_ssd[0 * (BLOCK_W + 2 * RADIUS)] = diffa[0];
col_ssd[1 * (BLOCK_W + 2 * RADIUS)] = diffa[1];
col_ssd[2 * (BLOCK_W + 2 * RADIUS)] = diffa[2];
col_ssd[3 * (BLOCK_W + 2 * RADIUS)] = diffa[3];
col_ssd[4 * (BLOCK_W + 2 * RADIUS)] = diffa[4];
col_ssd[5 * (BLOCK_W + 2 * RADIUS)] = diffa[5];
col_ssd[6 * (BLOCK_W + 2 * RADIUS)] = diffa[6];
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] = diffa[7];
}
template<int RADIUS>
__global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, PtrStepb disp, int maxdisp)
{
extern __shared__ unsigned int col_ssd_cache[];
volatile unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
volatile unsigned int *col_ssd_extra = threadIdx.x < (2 * RADIUS) ? col_ssd + BLOCK_W : 0; //#define N_DIRTY_PIXELS (2 * RADIUS)
//#define X (blockIdx.x * BLOCK_W + threadIdx.x + STEREO_MAXD)
int X = (blockIdx.x * BLOCK_W + threadIdx.x + maxdisp + RADIUS);
//#define Y (__mul24(blockIdx.y, ROWSperTHREAD) + RADIUS)
#define Y (blockIdx.y * ROWSperTHREAD + RADIUS)
//int Y = blockIdx.y * ROWSperTHREAD + RADIUS;
unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
unsigned char* disparImage = disp.data + X + Y * disp.step;
/* if (X < cwidth)
{
unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
*ptr = 0xFFFFFFFF;
}*/
int end_row = ::min(ROWSperTHREAD, cheight - Y - RADIUS);
int y_tex;
int x_tex = X - RADIUS;
if (x_tex >= cwidth)
return;
for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP)
{
y_tex = Y - RADIUS;
InitColSSD<RADIUS>(x_tex, y_tex, img_step, left, right, d, col_ssd);
if (col_ssd_extra > 0)
if (x_tex + BLOCK_W < cwidth)
InitColSSD<RADIUS>(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra);
__syncthreads(); //before MinSSD function
if (X < cwidth - RADIUS && Y < cheight - RADIUS)
{
uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd);
if (minSSD.x < minSSDImage[0])
{
disparImage[0] = (unsigned char)(d + minSSD.y);
minSSDImage[0] = minSSD.x;
}
}
for(int row = 1; row < end_row; row++)
{
int idx1 = y_tex * img_step + x_tex;
int idx2 = (y_tex + (2 * RADIUS + 1)) * img_step + x_tex;
__syncthreads();
StepDown<RADIUS>(idx1, idx2, left, right, d, col_ssd);
if (col_ssd_extra)
if (x_tex + BLOCK_W < cwidth)
StepDown<RADIUS>(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra);
y_tex += 1;
__syncthreads(); //before MinSSD function
if (X < cwidth - RADIUS && row < cheight - RADIUS - Y)
{
int idx = row * cminSSD_step;
uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd);
if (minSSD.x < minSSDImage[idx])
{
disparImage[disp.step * row] = (unsigned char)(d + minSSD.y);
minSSDImage[idx] = minSSD.x;
}
}
} // for row loop
} // for d loop
}
template<int RADIUS> void kernel_caller(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream)
{
dim3 grid(1,1,1);
dim3 threads(BLOCK_W, 1, 1);
grid.x = divUp(left.cols - maxdisp - 2 * RADIUS, BLOCK_W);
grid.y = divUp(left.rows - 2 * RADIUS, ROWSperTHREAD);
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
};
typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream);
const static kernel_caller_t callers[] =
{
0,
kernel_caller< 1>, kernel_caller< 2>, kernel_caller< 3>, kernel_caller< 4>, kernel_caller< 5>,
kernel_caller< 6>, kernel_caller< 7>, kernel_caller< 8>, kernel_caller< 9>, kernel_caller<10>,
kernel_caller<11>, kernel_caller<12>, kernel_caller<13>, kernel_caller<15>, kernel_caller<15>,
kernel_caller<16>, kernel_caller<17>, kernel_caller<18>, kernel_caller<19>, kernel_caller<20>,
kernel_caller<21>, kernel_caller<22>, kernel_caller<23>, kernel_caller<24>, kernel_caller<25>
//0,0,0, 0,0,0, 0,0,kernel_caller<9>
};
const int calles_num = sizeof(callers)/sizeof(callers[0]);
void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t& stream)
{
int winsz2 = winsz >> 1;
if (winsz2 == 0 || winsz2 >= calles_num)
CV_Error(cv::Error::StsBadArg, "Unsupported window size");
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
callers[winsz2](left, right, disp, maxdisp, stream);
}
//////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
__global__ void prefilter_kernel(PtrStepSzb output, int prefilterCap)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < output.cols && y < output.rows)
{
int conv = (int)tex2D(texForSobel, x - 1, y - 1) * (-1) + (int)tex2D(texForSobel, x + 1, y - 1) * (1) +
(int)tex2D(texForSobel, x - 1, y ) * (-2) + (int)tex2D(texForSobel, x + 1, y ) * (2) +
(int)tex2D(texForSobel, x - 1, y + 1) * (-1) + (int)tex2D(texForSobel, x + 1, y + 1) * (1);
conv = ::min(::min(::max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
output.ptr(y)[x] = conv & 0xFF;
}
}
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(input.cols, threads.x);
grid.y = divUp(input.rows, threads.y);
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
}
//////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////// Textureness filtering ////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
__device__ __forceinline__ float sobel(int x, int y)
{
float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) +
tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) +
tex2D(texForTF, x - 1, y + 1) * (-1) + tex2D(texForTF, x + 1, y + 1) * (1);
return fabs(conv);
}
__device__ float CalcSums(float *cols, float *cols_cache, int winsz)
{
float cache = 0;
float cache2 = 0;
int winsz2 = winsz/2;
for(int i = 1; i <= winsz2; i++)
cache += cols[i];
cols_cache[0] = cache;
__syncthreads();
if (threadIdx.x < blockDim.x - winsz2)
cache2 = cols_cache[winsz2];
else
for(int i = winsz2 + 1; i < winsz; i++)
cache2 += cols[i];
return cols[0] + cache + cache2;
}
#define RpT (2 * ROWSperTHREAD) // got experimentally
__global__ void textureness_kernel(PtrStepSzb disp, int winsz, float threshold)
{
int winsz2 = winsz/2;
int n_dirty_pixels = (winsz2) * 2;
extern __shared__ float cols_cache[];
float *cols = cols_cache + blockDim.x + threadIdx.x;
float *cols_extra = threadIdx.x < n_dirty_pixels ? cols + blockDim.x : 0;
int x = blockIdx.x * blockDim.x + threadIdx.x;
int beg_row = blockIdx.y * RpT;
int end_row = ::min(beg_row + RpT, disp.rows);
if (x < disp.cols)
{
int y = beg_row;
float sum = 0;
float sum_extra = 0;
for(int i = y - winsz2; i <= y + winsz2; ++i)
{
sum += sobel(x - winsz2, i);
if (cols_extra)
sum_extra += sobel(x + blockDim.x - winsz2, i);
}
*cols = sum;
if (cols_extra)
*cols_extra = sum_extra;
__syncthreads();
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
if (sum_win < threshold)
disp.data[y * disp.step + x] = 0;
__syncthreads();
for(int y = beg_row + 1; y < end_row; ++y)
{
sum = sum - sobel(x - winsz2, y - winsz2 - 1) + sobel(x - winsz2, y + winsz2);
*cols = sum;
if (cols_extra)
{
sum_extra = sum_extra - sobel(x + blockDim.x - winsz2, y - winsz2 - 1) + sobel(x + blockDim.x - winsz2, y + winsz2);
*cols_extra = sum_extra;
}
__syncthreads();
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
if (sum_win < threshold)
disp.data[y * disp.step + x] = 0;
__syncthreads();
}
}
}
void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream)
{
avgTexturenessThreshold *= winsz * winsz;
texForTF.filterMode = cudaFilterModeLinear;
texForTF.addressMode[0] = cudaAddressModeWrap;
texForTF.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D( 0, texForTF, input.data, desc, input.cols, input.rows, input.step ) );
dim3 threads(128, 1, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(input.cols, threads.x);
grid.y = divUp(input.rows, RpT);
size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float);
textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaUnbindTexture (texForTF) );
}
} // namespace stereobm
}}} // namespace cv { namespace gpu { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -1,538 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/limits.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace stereobp
{
///////////////////////////////////////////////////////////////
/////////////////////// load constants ////////////////////////
///////////////////////////////////////////////////////////////
__constant__ int cndisp;
__constant__ float cmax_data_term;
__constant__ float cdata_weight;
__constant__ float cmax_disc_term;
__constant__ float cdisc_single_jump;
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
{
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
}
///////////////////////////////////////////////////////////////
////////////////////////// comp data //////////////////////////
///////////////////////////////////////////////////////////////
template <int cn> struct PixDiff;
template <> struct PixDiff<1>
{
__device__ __forceinline__ PixDiff(const uchar* ls)
{
l = *ls;
}
__device__ __forceinline__ float operator()(const uchar* rs) const
{
return ::abs((int)l - *rs);
}
uchar l;
};
template <> struct PixDiff<3>
{
__device__ __forceinline__ PixDiff(const uchar* ls)
{
l = *((uchar3*)ls);
}
__device__ __forceinline__ float operator()(const uchar* rs) const
{
const float tr = 0.299f;
const float tg = 0.587f;
const float tb = 0.114f;
float val = tb * ::abs((int)l.x - rs[0]);
val += tg * ::abs((int)l.y - rs[1]);
val += tr * ::abs((int)l.z - rs[2]);
return val;
}
uchar3 l;
};
template <> struct PixDiff<4>
{
__device__ __forceinline__ PixDiff(const uchar* ls)
{
l = *((uchar4*)ls);
}
__device__ __forceinline__ float operator()(const uchar* rs) const
{
const float tr = 0.299f;
const float tg = 0.587f;
const float tb = 0.114f;
uchar4 r = *((uchar4*)rs);
float val = tb * ::abs((int)l.x - r.x);
val += tg * ::abs((int)l.y - r.y);
val += tr * ::abs((int)l.z - r.z);
return val;
}
uchar4 l;
};
template <int cn, typename D>
__global__ void comp_data(const PtrStepSzb left, const PtrStepb right, PtrStep<D> data)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y > 0 && y < left.rows - 1 && x > 0 && x < left.cols - 1)
{
const uchar* ls = left.ptr(y) + x * cn;
const PixDiff<cn> pixDiff(ls);
const uchar* rs = right.ptr(y) + x * cn;
D* ds = data.ptr(y) + x;
const size_t disp_step = data.step * left.rows / sizeof(D);
for (int disp = 0; disp < cndisp; disp++)
{
if (x - disp >= 1)
{
float val = pixDiff(rs - disp * cn);
ds[disp * disp_step] = saturate_cast<D>(fmin(cdata_weight * val, cdata_weight * cmax_data_term));
}
else
{
ds[disp * disp_step] = saturate_cast<D>(cdata_weight * cmax_data_term);
}
}
}
}
template<typename T, typename D>
void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
template <> void comp_data_gpu<uchar, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(left.cols, threads.x);
grid.y = divUp(left.rows, threads.y);
comp_data<1, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <> void comp_data_gpu<uchar, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(left.cols, threads.x);
grid.y = divUp(left.rows, threads.y);
comp_data<1, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <> void comp_data_gpu<uchar3, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(left.cols, threads.x);
grid.y = divUp(left.rows, threads.y);
comp_data<3, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <> void comp_data_gpu<uchar3, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(left.cols, threads.x);
grid.y = divUp(left.rows, threads.y);
comp_data<3, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <> void comp_data_gpu<uchar4, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(left.cols, threads.x);
grid.y = divUp(left.rows, threads.y);
comp_data<4, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <> void comp_data_gpu<uchar4, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(left.cols, threads.x);
grid.y = divUp(left.rows, threads.y);
comp_data<4, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
///////////////////////////////////////////////////////////////
//////////////////////// data step down ///////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst_cols && y < dst_rows)
{
for (int d = 0; d < cndisp; ++d)
{
float dst_reg = src.ptr(d * src_rows + (2*y+0))[(2*x+0)];
dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+0)];
dst_reg += src.ptr(d * src_rows + (2*y+0))[(2*x+1)];
dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+1)];
dst.ptr(d * dst_rows + y)[x] = saturate_cast<T>(dst_reg);
}
}
}
template<typename T>
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(dst_cols, threads.x);
grid.y = divUp(dst_rows, threads.y);
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
///////////////////////////////////////////////////////////////
/////////////////// level up messages ////////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst_cols && y < dst_rows)
{
const size_t dst_disp_step = dst.step * dst_rows / sizeof(T);
const size_t src_disp_step = src.step * src_rows / sizeof(T);
T* dstr = dst.ptr(y ) + x;
const T* srcr = src.ptr(y/2) + x/2;
for (int d = 0; d < cndisp; ++d)
dstr[d * dst_disp_step] = srcr[d * src_disp_step];
}
}
template <typename T>
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(dst_cols, threads.x);
grid.y = divUp(dst_rows, threads.y);
int src_idx = (dst_idx + 1) & 1;
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mus[src_idx], (PtrStepSz<T>)mus[dst_idx]);
cudaSafeCall( cudaGetLastError() );
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mds[src_idx], (PtrStepSz<T>)mds[dst_idx]);
cudaSafeCall( cudaGetLastError() );
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mls[src_idx], (PtrStepSz<T>)mls[dst_idx]);
cudaSafeCall( cudaGetLastError() );
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mrs[src_idx], (PtrStepSz<T>)mrs[dst_idx]);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
template void level_up_messages_gpu<float>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
///////////////////////////////////////////////////////////////
//////////////////// calc all iterations /////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__device__ void calc_min_linear_penalty(T* dst, size_t step)
{
float prev = dst[0];
float cur;
for (int disp = 1; disp < cndisp; ++disp)
{
prev += cdisc_single_jump;
cur = dst[step * disp];
if (prev < cur)
{
cur = prev;
dst[step * disp] = saturate_cast<T>(prev);
}
prev = cur;
}
prev = dst[(cndisp - 1) * step];
for (int disp = cndisp - 2; disp >= 0; disp--)
{
prev += cdisc_single_jump;
cur = dst[step * disp];
if (prev < cur)
{
cur = prev;
dst[step * disp] = saturate_cast<T>(prev);
}
prev = cur;
}
}
template <typename T>
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
{
float minimum = cudev::numeric_limits<float>::max();
for(int i = 0; i < cndisp; ++i)
{
float dst_reg = msg1[msg_disp_step * i];
dst_reg += msg2[msg_disp_step * i];
dst_reg += msg3[msg_disp_step * i];
dst_reg += data[data_disp_step * i];
if (dst_reg < minimum)
minimum = dst_reg;
dst[msg_disp_step * i] = saturate_cast<T>(dst_reg);
}
calc_min_linear_penalty(dst, msg_disp_step);
minimum += cmax_disc_term;
float sum = 0;
for(int i = 0; i < cndisp; ++i)
{
float dst_reg = dst[msg_disp_step * i];
if (dst_reg > minimum)
{
dst_reg = minimum;
dst[msg_disp_step * i] = saturate_cast<T>(minimum);
}
sum += dst_reg;
}
sum /= cndisp;
for(int i = 0; i < cndisp; ++i)
dst[msg_disp_step * i] -= sum;
}
template <typename T>
__global__ void one_iteration(int t, int elem_step, T* u, T* d, T* l, T* r, const PtrStep<T> data, int cols, int rows)
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1))
{
T* us = u + y * elem_step + x;
T* ds = d + y * elem_step + x;
T* ls = l + y * elem_step + x;
T* rs = r + y * elem_step + x;
const T* dt = data.ptr(y) + x;
size_t msg_disp_step = elem_step * rows;
size_t data_disp_step = data.step * rows / sizeof(T);
message(us + elem_step, ls + 1, rs - 1, dt, us, msg_disp_step, data_disp_step);
message(ds - elem_step, ls + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step);
message(us + elem_step, ds - elem_step, rs - 1, dt, rs, msg_disp_step, data_disp_step);
message(us + elem_step, ds - elem_step, ls + 1, dt, ls, msg_disp_step, data_disp_step);
}
}
template <typename T>
void calc_all_iterations_gpu(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d,
const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(cols, threads.x << 1);
grid.y = divUp(rows, threads.y);
int elem_step = (int)(u.step / sizeof(T));
for(int t = 0; t < iters; ++t)
{
one_iteration<T><<<grid, threads, 0, stream>>>(t, elem_step, (T*)u.data, (T*)d.data, (T*)l.data, (T*)r.data, (PtrStepSz<T>)data, cols, rows);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
template void calc_all_iterations_gpu<short>(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
template void calc_all_iterations_gpu<float>(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
///////////////////////////////////////////////////////////////
/////////////////////////// output ////////////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__global__ void output(const int elem_step, const T* u, const T* d, const T* l, const T* r, const T* data,
PtrStepSz<short> disp)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y > 0 && y < disp.rows - 1 && x > 0 && x < disp.cols - 1)
{
const T* us = u + (y + 1) * elem_step + x;
const T* ds = d + (y - 1) * elem_step + x;
const T* ls = l + y * elem_step + (x + 1);
const T* rs = r + y * elem_step+ (x - 1);
const T* dt = data + y * elem_step + x;
size_t disp_step = disp.rows * elem_step;
int best = 0;
float best_val = numeric_limits<float>::max();
for (int d = 0; d < cndisp; ++d)
{
float val = us[d * disp_step];
val += ds[d * disp_step];
val += ls[d * disp_step];
val += rs[d * disp_step];
val += dt[d * disp_step];
if (val < best_val)
{
best_val = val;
best = d;
}
}
disp.ptr(y)[x] = saturate_cast<short>(best);
}
}
template <typename T>
void output_gpu(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data,
const PtrStepSz<short>& disp, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
int elem_step = static_cast<int>(u.step/sizeof(T));
output<T><<<grid, threads, 0, stream>>>(elem_step, (const T*)u.data, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, disp);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
template void output_gpu<float>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
} // namespace stereobp
}}} // namespace cv { namespace gpu { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -1,864 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/functional.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace stereocsbp
{
///////////////////////////////////////////////////////////////
/////////////////////// load constants ////////////////////////
///////////////////////////////////////////////////////////////
__constant__ int cndisp;
__constant__ float cmax_data_term;
__constant__ float cdata_weight;
__constant__ float cmax_disc_term;
__constant__ float cdisc_single_jump;
__constant__ int cth;
__constant__ size_t cimg_step;
__constant__ size_t cmsg_step;
__constant__ size_t cdisp_step1;
__constant__ size_t cdisp_step2;
__constant__ uchar* cleft;
__constant__ uchar* cright;
__constant__ uchar* ctemp;
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp)
{
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
}
///////////////////////////////////////////////////////////////
/////////////////////// init data cost ////////////////////////
///////////////////////////////////////////////////////////////
template <int channels> struct DataCostPerPixel;
template <> struct DataCostPerPixel<1>
{
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
{
return fmin(cdata_weight * ::abs((int)*left - *right), cdata_weight * cmax_data_term);
}
};
template <> struct DataCostPerPixel<3>
{
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
{
float tb = 0.114f * ::abs((int)left[0] - right[0]);
float tg = 0.587f * ::abs((int)left[1] - right[1]);
float tr = 0.299f * ::abs((int)left[2] - right[2]);
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
}
};
template <> struct DataCostPerPixel<4>
{
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
{
uchar4 l = *((const uchar4*)left);
uchar4 r = *((const uchar4*)right);
float tb = 0.114f * ::abs((int)l.x - r.x);
float tg = 0.587f * ::abs((int)l.y - r.y);
float tr = 0.299f * ::abs((int)l.z - r.z);
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
}
};
template <typename T>
__global__ void get_first_k_initial_global(T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
T* selected_disparity = selected_disp_pyr + y * cmsg_step + x;
T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x;
T* data_cost = (T*)ctemp + y * cmsg_step + x;
for(int i = 0; i < nr_plane; i++)
{
T minimum = cudev::numeric_limits<T>::max();
int id = 0;
for(int d = 0; d < cndisp; d++)
{
T cur = data_cost[d * cdisp_step1];
if(cur < minimum)
{
minimum = cur;
id = d;
}
}
data_cost_selected[i * cdisp_step1] = minimum;
selected_disparity[i * cdisp_step1] = id;
data_cost [id * cdisp_step1] = numeric_limits<T>::max();
}
}
}
template <typename T>
__global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
T* selected_disparity = selected_disp_pyr + y * cmsg_step + x;
T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x;
T* data_cost = (T*)ctemp + y * cmsg_step + x;
int nr_local_minimum = 0;
T prev = data_cost[0 * cdisp_step1];
T cur = data_cost[1 * cdisp_step1];
T next = data_cost[2 * cdisp_step1];
for (int d = 1; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
{
if (cur < prev && cur < next)
{
data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
selected_disparity[nr_local_minimum * cdisp_step1] = d;
data_cost[d * cdisp_step1] = numeric_limits<T>::max();
nr_local_minimum++;
}
prev = cur;
cur = next;
next = data_cost[(d + 1) * cdisp_step1];
}
for (int i = nr_local_minimum; i < nr_plane; i++)
{
T minimum = numeric_limits<T>::max();
int id = 0;
for (int d = 0; d < cndisp; d++)
{
cur = data_cost[d * cdisp_step1];
if (cur < minimum)
{
minimum = cur;
id = d;
}
}
data_cost_selected[i * cdisp_step1] = minimum;
selected_disparity[i * cdisp_step1] = id;
data_cost[id * cdisp_step1] = numeric_limits<T>::max();
}
}
}
template <typename T, int channels>
__global__ void init_data_cost(int h, int w, int level)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
int y0 = y << level;
int yt = (y + 1) << level;
int x0 = x << level;
int xt = (x + 1) << level;
T* data_cost = (T*)ctemp + y * cmsg_step + x;
for(int d = 0; d < cndisp; ++d)
{
float val = 0.0f;
for(int yi = y0; yi < yt; yi++)
{
for(int xi = x0; xi < xt; xi++)
{
int xr = xi - d;
if(d < cth || xr < 0)
val += cdata_weight * cmax_data_term;
else
{
const uchar* lle = cleft + yi * cimg_step + xi * channels;
const uchar* lri = cright + yi * cimg_step + xr * channels;
val += DataCostPerPixel<channels>::compute(lle, lri);
}
}
}
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
}
}
}
template <typename T, int winsz, int channels>
__global__ void init_data_cost_reduce(int level, int rows, int cols, int h)
{
int x_out = blockIdx.x;
int y_out = blockIdx.y % h;
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
int tid = threadIdx.x;
if (d < cndisp)
{
int x0 = x_out << level;
int y0 = y_out << level;
int len = ::min(y0 + winsz, rows) - y0;
float val = 0.0f;
if (x0 + tid < cols)
{
if (x0 + tid - d < 0 || d < cth)
val = cdata_weight * cmax_data_term * len;
else
{
const uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
const uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
for(int y = 0; y < len; ++y)
{
val += DataCostPerPixel<channels>::compute(lle, lri);
lle += cimg_step;
lri += cimg_step;
}
}
}
extern __shared__ float smem[];
reduce<winsz>(smem + winsz * threadIdx.z, val, tid, plus<float>());
T* data_cost = (T*)ctemp + y_out * cmsg_step + x_out;
if (tid == 0)
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
}
}
template <typename T>
void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y);
switch (channels)
{
case 1: init_data_cost<T, 1><<<grid, threads, 0, stream>>>(h, w, level); break;
case 3: init_data_cost<T, 3><<<grid, threads, 0, stream>>>(h, w, level); break;
case 4: init_data_cost<T, 4><<<grid, threads, 0, stream>>>(h, w, level); break;
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
}
}
template <typename T, int winsz>
void init_data_cost_reduce_caller_(int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream)
{
const int threadsNum = 256;
const size_t smem_size = threadsNum * sizeof(float);
dim3 threads(winsz, 1, threadsNum / winsz);
dim3 grid(w, h, 1);
grid.y *= divUp(ndisp, threads.z);
switch (channels)
{
case 1: init_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
case 3: init_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
case 4: init_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
}
}
template<class T>
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream)
{
typedef void (*InitDataCostCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream);
static const InitDataCostCaller init_data_cost_callers[] =
{
init_data_cost_caller_<T>, init_data_cost_caller_<T>, init_data_cost_reduce_caller_<T, 4>,
init_data_cost_reduce_caller_<T, 8>, init_data_cost_reduce_caller_<T, 16>, init_data_cost_reduce_caller_<T, 32>,
init_data_cost_reduce_caller_<T, 64>, init_data_cost_reduce_caller_<T, 128>, init_data_cost_reduce_caller_<T, 256>
};
size_t disp_step = msg_step * h;
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y);
if (use_local_init_data_cost == true)
get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
else
get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
template void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step,
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
///////////////////////////////////////////////////////////////
////////////////////// compute data cost //////////////////////
///////////////////////////////////////////////////////////////
template <typename T, int channels>
__global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
int y0 = y << level;
int yt = (y + 1) << level;
int x0 = x << level;
int xt = (x + 1) << level;
const T* selected_disparity = selected_disp_pyr + y/2 * cmsg_step + x/2;
T* data_cost = data_cost_ + y * cmsg_step + x;
for(int d = 0; d < nr_plane; d++)
{
float val = 0.0f;
for(int yi = y0; yi < yt; yi++)
{
for(int xi = x0; xi < xt; xi++)
{
int sel_disp = selected_disparity[d * cdisp_step2];
int xr = xi - sel_disp;
if (xr < 0 || sel_disp < cth)
val += cdata_weight * cmax_data_term;
else
{
const uchar* left_x = cleft + yi * cimg_step + xi * channels;
const uchar* right_x = cright + yi * cimg_step + xr * channels;
val += DataCostPerPixel<channels>::compute(left_x, right_x);
}
}
}
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
}
}
}
template <typename T, int winsz, int channels>
__global__ void compute_data_cost_reduce(const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane)
{
int x_out = blockIdx.x;
int y_out = blockIdx.y % h;
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
int tid = threadIdx.x;
const T* selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step + x_out/2;
T* data_cost = data_cost_ + y_out * cmsg_step + x_out;
if (d < nr_plane)
{
int sel_disp = selected_disparity[d * cdisp_step2];
int x0 = x_out << level;
int y0 = y_out << level;
int len = ::min(y0 + winsz, rows) - y0;
float val = 0.0f;
if (x0 + tid < cols)
{
if (x0 + tid - sel_disp < 0 || sel_disp < cth)
val = cdata_weight * cmax_data_term * len;
else
{
const uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
const uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - sel_disp);
for(int y = 0; y < len; ++y)
{
val += DataCostPerPixel<channels>::compute(lle, lri);
lle += cimg_step;
lri += cimg_step;
}
}
}
extern __shared__ float smem[];
reduce<winsz>(smem + winsz * threadIdx.z, val, tid, plus<float>());
if (tid == 0)
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
}
}
template <typename T>
void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/,
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y);
switch(channels)
{
case 1: compute_data_cost<T, 1><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
case 3: compute_data_cost<T, 3><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
case 4: compute_data_cost<T, 4><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
}
}
template <typename T, int winsz>
void compute_data_cost_reduce_caller_(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
{
const int threadsNum = 256;
const size_t smem_size = threadsNum * sizeof(float);
dim3 threads(winsz, 1, threadsNum / winsz);
dim3 grid(w, h, 1);
grid.y *= divUp(nr_plane, threads.z);
switch (channels)
{
case 1: compute_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
case 3: compute_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
case 4: compute_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
}
}
template<class T>
void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step,
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream)
{
typedef void (*ComputeDataCostCaller)(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream);
static const ComputeDataCostCaller callers[] =
{
compute_data_cost_caller_<T>, compute_data_cost_caller_<T>, compute_data_cost_reduce_caller_<T, 4>,
compute_data_cost_reduce_caller_<T, 8>, compute_data_cost_reduce_caller_<T, 16>, compute_data_cost_reduce_caller_<T, 32>,
compute_data_cost_reduce_caller_<T, 64>, compute_data_cost_reduce_caller_<T, 128>, compute_data_cost_reduce_caller_<T, 256>
};
size_t disp_step1 = msg_step * h;
size_t disp_step2 = msg_step * h2;
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step,
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step,
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////
//////////////////////// init message /////////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
T* data_cost_selected, T* disparity_selected_new, T* data_cost_new,
const T* data_cost_cur, const T* disparity_selected_cur,
int nr_plane, int nr_plane2)
{
for(int i = 0; i < nr_plane; i++)
{
T minimum = numeric_limits<T>::max();
int id = 0;
for(int j = 0; j < nr_plane2; j++)
{
T cur = data_cost_new[j * cdisp_step1];
if(cur < minimum)
{
minimum = cur;
id = j;
}
}
data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
data_cost_new[id * cdisp_step1] = numeric_limits<T>::max();
}
}
template <typename T>
__global__ void init_message(T* u_new_, T* d_new_, T* l_new_, T* r_new_,
const T* u_cur_, const T* d_cur_, const T* l_cur_, const T* r_cur_,
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
T* data_cost_selected_, const T* data_cost_,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < h && x < w)
{
const T* u_cur = u_cur_ + ::min(h2-1, y/2 + 1) * cmsg_step + x/2;
const T* d_cur = d_cur_ + ::max(0, y/2 - 1) * cmsg_step + x/2;
const T* l_cur = l_cur_ + (y/2) * cmsg_step + ::min(w2-1, x/2 + 1);
const T* r_cur = r_cur_ + (y/2) * cmsg_step + ::max(0, x/2 - 1);
T* data_cost_new = (T*)ctemp + y * cmsg_step + x;
const T* disparity_selected_cur = selected_disp_pyr_cur + y/2 * cmsg_step + x/2;
const T* data_cost = data_cost_ + y * cmsg_step + x;
for(int d = 0; d < nr_plane2; d++)
{
int idx2 = d * cdisp_step2;
T val = data_cost[d * cdisp_step1] + u_cur[idx2] + d_cur[idx2] + l_cur[idx2] + r_cur[idx2];
data_cost_new[d * cdisp_step1] = val;
}
T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x;
T* disparity_selected_new = selected_disp_pyr_new + y * cmsg_step + x;
T* u_new = u_new_ + y * cmsg_step + x;
T* d_new = d_new_ + y * cmsg_step + x;
T* l_new = l_new_ + y * cmsg_step + x;
T* r_new = r_new_ + y * cmsg_step + x;
u_cur = u_cur_ + y/2 * cmsg_step + x/2;
d_cur = d_cur_ + y/2 * cmsg_step + x/2;
l_cur = l_cur_ + y/2 * cmsg_step + x/2;
r_cur = r_cur_ + y/2 * cmsg_step + x/2;
get_first_k_element_increase(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur,
data_cost_selected, disparity_selected_new, data_cost_new,
data_cost, disparity_selected_cur, nr_plane, nr_plane2);
}
}
template<class T>
void init_message(T* u_new, T* d_new, T* l_new, T* r_new,
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
T* data_cost_selected, const T* data_cost, size_t msg_step,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream)
{
size_t disp_step1 = msg_step * h;
size_t disp_step2 = msg_step * h2;
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x);
grid.y = divUp(h, threads.y);
init_message<<<grid, threads, 0, stream>>>(u_new, d_new, l_new, r_new,
u_cur, d_cur, l_cur, r_cur,
selected_disp_pyr_new, selected_disp_pyr_cur,
data_cost_selected, data_cost,
h, w, nr_plane, h2, w2, nr_plane2);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void init_message(short* u_new, short* d_new, short* l_new, short* r_new,
const short* u_cur, const short* d_cur, const short* l_cur, const short* r_cur,
short* selected_disp_pyr_new, const short* selected_disp_pyr_cur,
short* data_cost_selected, const short* data_cost, size_t msg_step,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
template void init_message(float* u_new, float* d_new, float* l_new, float* r_new,
const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur,
float* selected_disp_pyr_new, const float* selected_disp_pyr_cur,
float* data_cost_selected, const float* data_cost, size_t msg_step,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
///////////////////////////////////////////////////////////////
//////////////////// calc all iterations /////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
const T* dst_disp, const T* src_disp, int nr_plane, volatile T* temp)
{
T minimum = numeric_limits<T>::max();
for(int d = 0; d < nr_plane; d++)
{
int idx = d * cdisp_step1;
T val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
if(val < minimum)
minimum = val;
msg_dst[idx] = val;
}
float sum = 0;
for(int d = 0; d < nr_plane; d++)
{
float cost_min = minimum + cmax_disc_term;
T src_disp_reg = src_disp[d * cdisp_step1];
for(int d2 = 0; d2 < nr_plane; d2++)
cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + cdisc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg));
temp[d * cdisp_step1] = saturate_cast<T>(cost_min);
sum += cost_min;
}
sum /= nr_plane;
for(int d = 0; d < nr_plane; d++)
msg_dst[d * cdisp_step1] = saturate_cast<T>(temp[d * cdisp_step1] - sum);
}
template <typename T>
__global__ void compute_message(T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i)
{
int y = blockIdx.y * blockDim.y + threadIdx.y;
int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1);
if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
{
const T* data = data_cost_selected + y * cmsg_step + x;
T* u = u_ + y * cmsg_step + x;
T* d = d_ + y * cmsg_step + x;
T* l = l_ + y * cmsg_step + x;
T* r = r_ + y * cmsg_step + x;
const T* disp = selected_disp_pyr_cur + y * cmsg_step + x;
T* temp = (T*)ctemp + y * cmsg_step + x;
message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, temp);
message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, temp);
message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, temp);
message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, temp);
}
}
template<class T>
void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected,
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
{
size_t disp_step = msg_step * h;
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(w, threads.x << 1);
grid.y = divUp(h, threads.y);
for(int t = 0; t < iters; ++t)
{
compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
cudaSafeCall( cudaGetLastError() );
}
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
};
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
int h, int w, int nr_plane, int iters, cudaStream_t stream);
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
int h, int w, int nr_plane, int iters, cudaStream_t stream);
///////////////////////////////////////////////////////////////
/////////////////////////// output ////////////////////////////
///////////////////////////////////////////////////////////////
template <typename T>
__global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
const T* data_cost_selected, const T* disp_selected_pyr,
PtrStepSz<short> disp, int nr_plane)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y > 0 && y < disp.rows - 1 && x > 0 && x < disp.cols - 1)
{
const T* data = data_cost_selected + y * cmsg_step + x;
const T* disp_selected = disp_selected_pyr + y * cmsg_step + x;
const T* u = u_ + (y+1) * cmsg_step + (x+0);
const T* d = d_ + (y-1) * cmsg_step + (x+0);
const T* l = l_ + (y+0) * cmsg_step + (x+1);
const T* r = r_ + (y+0) * cmsg_step + (x-1);
int best = 0;
T best_val = numeric_limits<T>::max();
for (int i = 0; i < nr_plane; ++i)
{
int idx = i * cdisp_step1;
T val = data[idx]+ u[idx] + d[idx] + l[idx] + r[idx];
if (val < best_val)
{
best_val = val;
best = saturate_cast<short>(disp_selected[idx]);
}
}
disp(y, x) = best;
}
}
template<class T>
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream)
{
size_t disp_step = disp.rows * msg_step;
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
compute_disp<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
} // namespace stereocsbp
}}} // namespace cv { namespace gpu { namespace cudev {
#endif /* CUDA_DISABLER */

View File

@@ -1,157 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_no_cuda(); }
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_no_cuda(); }
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace cudev
{
namespace disp_bilateral_filter
{
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
template<typename T>
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
}
}}}
using namespace ::cv::gpu::cudev::disp_bilateral_filter;
namespace
{
const float DEFAULT_EDGE_THRESHOLD = 0.1f;
const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;
const float DEFAULT_SIGMA_RANGE = 10.0f;
inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)
{
Mat cpu_table_color(1, len, CV_32F);
float* line = cpu_table_color.ptr<float>();
for(int i = 0; i < len; i++)
line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));
table_color.upload(cpu_table_color);
}
inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)
{
int half = (win_size >> 1);
Mat cpu_table_space(half + 1, half + 1, CV_32F);
for (int y = 0; y <= half; ++y)
{
float* row = cpu_table_space.ptr<float>(y);
for (int x = 0; x <= half; ++x)
row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);
}
table_space.upload(cpu_table_space);
}
template <typename T>
void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
GpuMat& table_color, GpuMat& table_space,
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
{
short edge_disc = std::max<short>(short(1), short(ndisp * edge_threshold + 0.5));
short max_disc = short(ndisp * max_disc_threshold + 0.5);
disp_load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
if (&dst != &disp)
{
if (stream)
stream.enqueueCopy(disp, dst);
else
disp.copyTo(dst);
}
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
}
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
GpuMat& table_color, GpuMat& table_space,
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream);
const bilateral_filter_operator_t operators[] =
{disp_bilateral_filter_operator<unsigned char>, 0, 0, disp_bilateral_filter_operator<short>, 0, 0, 0, 0};
}
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_)
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),
sigma_range(DEFAULT_SIGMA_RANGE)
{
calc_color_weighted_table(table_color, sigma_range, 255);
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
}
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_,
float max_disc_threshold_, float sigma_range_)
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_),
sigma_range(sigma_range_)
{
calc_color_weighted_table(table_color, sigma_range, 255);
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
}
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
{
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -1,140 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::gpu::StereoBM_GPU::StereoBM_GPU() { throw_no_cuda(); }
cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_no_cuda(); }
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_no_cuda(); return false; }
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace cudev
{
namespace stereobm
{
void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int ndisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t & stream);
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream);
}
}}}
namespace
{
const float defaultAvgTexThreshold = 3;
}
cv::gpu::StereoBM_GPU::StereoBM_GPU()
: preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold)
{
}
cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_, int winSize_)
: preset(preset_), ndisp(ndisparities_), winSize(winSize_), avergeTexThreshold(defaultAvgTexThreshold)
{
const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);
CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp);
CV_Assert(ndisp % 8 == 0);
CV_Assert(winSize % 2 == 1);
}
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
{
if (0 == getCudaEnabledDeviceCount())
return false;
DeviceInfo device_info;
if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
return true;
return false;
}
namespace
{
void stereo_bm_gpu_operator( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
{
using namespace ::cv::gpu::cudev::stereobm;
CV_Assert(left.rows == right.rows && left.cols == right.cols);
CV_Assert(left.type() == CV_8UC1);
CV_Assert(right.type() == CV_8UC1);
disparity.create(left.size(), CV_8U);
minSSD.create(left.size(), CV_32S);
GpuMat le_for_bm = left;
GpuMat ri_for_bm = right;
if (preset == StereoBM_GPU::PREFILTER_XSOBEL)
{
leBuf.create( left.size(), left.type());
riBuf.create(right.size(), right.type());
prefilter_xsobel( left, leBuf, 31, stream);
prefilter_xsobel(right, riBuf, 31, stream);
le_for_bm = leBuf;
ri_for_bm = riBuf;
}
stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);
if (avergeTexThreshold)
postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream);
}
}
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream)
{
stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -1,368 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int, int, int&, int&, int&) { throw_no_cuda(); }
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, int) { throw_no_cuda(); }
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, float, float, float, float, int) { throw_no_cuda(); }
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace cudev
{
namespace stereobp
{
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
template<typename T, typename D>
void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
template<typename T>
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
template <typename T>
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
template <typename T>
void calc_all_iterations_gpu(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d,
const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
template <typename T>
void output_gpu(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data,
const PtrStepSz<short>& disp, cudaStream_t stream);
}
}}}
using namespace ::cv::gpu::cudev::stereobp;
namespace
{
const float DEFAULT_MAX_DATA_TERM = 10.0f;
const float DEFAULT_DATA_WEIGHT = 0.07f;
const float DEFAULT_MAX_DISC_TERM = 1.7f;
const float DEFAULT_DISC_SINGLE_JUMP = 1.0f;
}
void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
{
ndisp = width / 4;
if ((ndisp & 1) != 0)
ndisp++;
int mm = std::max(width, height);
iters = mm / 100 + 2;
levels = (int)(::log(static_cast<double>(mm)) + 1) * 4 / 5;
if (levels == 0) levels++;
}
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, int msg_type_)
: ndisp(ndisp_), iters(iters_), levels(levels_),
max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP),
msg_type(msg_type_), datas(levels_)
{
}
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_, int msg_type_)
: ndisp(ndisp_), iters(iters_), levels(levels_),
max_data_term(max_data_term_), data_weight(data_weight_),
max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_),
msg_type(msg_type_), datas(levels_)
{
}
namespace
{
class StereoBeliefPropagationImpl
{
public:
StereoBeliefPropagationImpl(StereoBeliefPropagation& rthis_,
GpuMat& u_, GpuMat& d_, GpuMat& l_, GpuMat& r_,
GpuMat& u2_, GpuMat& d2_, GpuMat& l2_, GpuMat& r2_,
std::vector<GpuMat>& datas_, GpuMat& out_)
: rthis(rthis_), u(u_), d(d_), l(l_), r(r_), u2(u2_), d2(d2_), l2(l2_), r2(r2_), datas(datas_), out(out_),
zero(Scalar::all(0)), scale(rthis_.msg_type == CV_32F ? 1.0f : 10.0f)
{
CV_Assert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels);
CV_Assert(rthis.msg_type == CV_32F || rthis.msg_type == CV_16S);
CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < std::numeric_limits<short>::max());
}
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
{
typedef void (*comp_data_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
static const comp_data_t comp_data_callers[2][5] =
{
{0, comp_data_gpu<unsigned char, short>, 0, comp_data_gpu<uchar3, short>, comp_data_gpu<uchar4, short>},
{0, comp_data_gpu<unsigned char, float>, 0, comp_data_gpu<uchar3, float>, comp_data_gpu<uchar4, float>}
};
CV_Assert(left.size() == right.size() && left.type() == right.type());
CV_Assert(left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4);
rows = left.rows;
cols = left.cols;
int divisor = (int)pow(2.f, rthis.levels - 1.0f);
int lowest_cols = cols / divisor;
int lowest_rows = rows / divisor;
const int min_image_dim_size = 2;
CV_Assert(std::min(lowest_cols, lowest_rows) > min_image_dim_size);
init(stream);
datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type);
comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream));
calcBP(disp, stream);
}
void operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
{
CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0));
rows = data.rows / rthis.ndisp;
cols = data.cols;
int divisor = (int)pow(2.f, rthis.levels - 1.0f);
int lowest_cols = cols / divisor;
int lowest_rows = rows / divisor;
const int min_image_dim_size = 2;
CV_Assert(std::min(lowest_cols, lowest_rows) > min_image_dim_size);
init(stream);
datas[0] = data;
calcBP(disp, stream);
}
private:
void init(Stream& stream)
{
u.create(rows * rthis.ndisp, cols, rthis.msg_type);
d.create(rows * rthis.ndisp, cols, rthis.msg_type);
l.create(rows * rthis.ndisp, cols, rthis.msg_type);
r.create(rows * rthis.ndisp, cols, rthis.msg_type);
if (rthis.levels & 1)
{
//can clear less area
if (stream)
{
stream.enqueueMemSet(u, zero);
stream.enqueueMemSet(d, zero);
stream.enqueueMemSet(l, zero);
stream.enqueueMemSet(r, zero);
}
else
{
u.setTo(zero);
d.setTo(zero);
l.setTo(zero);
r.setTo(zero);
}
}
if (rthis.levels > 1)
{
int less_rows = (rows + 1) / 2;
int less_cols = (cols + 1) / 2;
u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
if ((rthis.levels & 1) == 0)
{
if (stream)
{
stream.enqueueMemSet(u2, zero);
stream.enqueueMemSet(d2, zero);
stream.enqueueMemSet(l2, zero);
stream.enqueueMemSet(r2, zero);
}
else
{
u2.setTo(zero);
d2.setTo(zero);
l2.setTo(zero);
r2.setTo(zero);
}
}
}
load_constants(rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, scale * rthis.max_disc_term, scale * rthis.disc_single_jump);
datas.resize(rthis.levels);
cols_all.resize(rthis.levels);
rows_all.resize(rthis.levels);
cols_all[0] = cols;
rows_all[0] = rows;
}
void calcBP(GpuMat& disp, Stream& stream)
{
typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
static const data_step_down_t data_step_down_callers[2] =
{
data_step_down_gpu<short>, data_step_down_gpu<float>
};
typedef void (*level_up_messages_t)(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
static const level_up_messages_t level_up_messages_callers[2] =
{
level_up_messages_gpu<short>, level_up_messages_gpu<float>
};
typedef void (*calc_all_iterations_t)(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
static const calc_all_iterations_t calc_all_iterations_callers[2] =
{
calc_all_iterations_gpu<short>, calc_all_iterations_gpu<float>
};
typedef void (*output_t)(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
static const output_t output_callers[2] =
{
output_gpu<short>, output_gpu<float>
};
const int funcIdx = rthis.msg_type == CV_32F;
cudaStream_t cudaStream = StreamAccessor::getStream(stream);
for (int i = 1; i < rthis.levels; ++i)
{
cols_all[i] = (cols_all[i-1] + 1) / 2;
rows_all[i] = (rows_all[i-1] + 1) / 2;
datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type);
data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream);
}
PtrStepSzb mus[] = {u, u2};
PtrStepSzb mds[] = {d, d2};
PtrStepSzb mrs[] = {r, r2};
PtrStepSzb mls[] = {l, l2};
int mem_idx = (rthis.levels & 1) ? 0 : 1;
for (int i = rthis.levels - 1; i >= 0; --i)
{
// for lower level we have already computed messages by setting to zero
if (i != rthis.levels - 1)
level_up_messages_callers[funcIdx](mem_idx, cols_all[i], rows_all[i], rows_all[i+1], mus, mds, mls, mrs, cudaStream);
calc_all_iterations_callers[funcIdx](cols_all[i], rows_all[i], rthis.iters, mus[mem_idx], mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i], cudaStream);
mem_idx = (mem_idx + 1) & 1;
}
if (disp.empty())
disp.create(rows, cols, CV_16S);
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
if (stream)
stream.enqueueMemSet(out, zero);
else
out.setTo(zero);
output_callers[funcIdx](u, d, l, r, datas.front(), out, cudaStream);
if (disp.type() != CV_16S)
{
if (stream)
stream.enqueueConvert(out, disp, disp.type());
else
out.convertTo(disp, disp.type());
}
}
StereoBeliefPropagation& rthis;
GpuMat& u;
GpuMat& d;
GpuMat& l;
GpuMat& r;
GpuMat& u2;
GpuMat& d2;
GpuMat& l2;
GpuMat& r2;
std::vector<GpuMat>& datas;
GpuMat& out;
const Scalar zero;
const float scale;
int rows, cols;
std::vector<int> cols_all, rows_all;
};
}
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
{
StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
impl(left, right, disp, stream);
}
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
{
StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
impl(data, disp, stream);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -1,311 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::gpu;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int&, int&, int&, int&) { throw_no_cuda(); }
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int) { throw_no_cuda(); }
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float, float, float, int, int) { throw_no_cuda(); }
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
#include "opencv2/core/utility.hpp"
namespace cv { namespace gpu { namespace cudev
{
namespace stereocsbp
{
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp);
template<class T>
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
template<class T>
void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step,
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
template<class T>
void init_message(T* u_new, T* d_new, T* l_new, T* r_new,
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
T* data_cost_selected, const T* data_cost, size_t msg_step,
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
template<class T>
void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected,
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream);
template<class T>
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
}
}}}
using namespace ::cv::gpu::cudev::stereocsbp;
namespace
{
const float DEFAULT_MAX_DATA_TERM = 30.0f;
const float DEFAULT_DATA_WEIGHT = 1.0f;
const float DEFAULT_MAX_DISC_TERM = 160.0f;
const float DEFAULT_DISC_SINGLE_JUMP = 10.0f;
}
void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
{
ndisp = (int) ((float) width / 3.14f);
if ((ndisp & 1) != 0)
ndisp++;
int mm = std::max(width, height);
iters = mm / 100 + ((mm > 1200)? - 4 : 4);
levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
if (levels == 0) levels++;
nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
}
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
int msg_type_)
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0),
msg_type(msg_type_), use_local_init_data_cost(true)
{
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
}
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_,
int min_disp_th_, int msg_type_)
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
max_data_term(max_data_term_), data_weight(data_weight_),
max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_),
msg_type(msg_type_), use_local_init_data_cost(true)
{
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
}
template<class T>
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
{
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4));
const Scalar zero = Scalar::all(0);
cudaStream_t cudaStream = StreamAccessor::getStream(stream);
////////////////////////////////////////////////////////////////////////////////////////////
// Init
int rows = left.rows;
int cols = left.cols;
rthis.levels = std::min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0)));
int levels = rthis.levels;
// compute sizes
AutoBuffer<int> buf(levels * 3);
int* cols_pyr = buf;
int* rows_pyr = cols_pyr + levels;
int* nr_plane_pyr = rows_pyr + levels;
cols_pyr[0] = cols;
rows_pyr[0] = rows;
nr_plane_pyr[0] = rthis.nr_plane;
for (int i = 1; i < levels; i++)
{
cols_pyr[i] = cols_pyr[i-1] / 2;
rows_pyr[i] = rows_pyr[i-1] / 2;
nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2;
}
GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected;
//allocate buffers
int buffers_count = 10; // (up + down + left + right + disp_selected_pyr) * 2
buffers_count += 2; // data_cost has twice more rows than other buffers, what's why +2, not +1;
buffers_count += 1; // data_cost_selected
mbuf.create(rows * rthis.nr_plane * buffers_count, cols, DataType<T>::type);
data_cost = mbuf.rowRange(0, rows * rthis.nr_plane * 2);
data_cost_selected = mbuf.rowRange(data_cost.rows, data_cost.rows + rows * rthis.nr_plane);
for(int k = 0; k < 2; ++k) // in/out
{
GpuMat sub1 = mbuf.rowRange(data_cost.rows + data_cost_selected.rows, mbuf.rows);
GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2);
GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] };
for(int _r = 0; _r < 5; ++_r)
{
*buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5);
assert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane);
}
};
size_t elem_step = mbuf.step / sizeof(T);
Size temp_size = data_cost.size();
if ((size_t)temp_size.area() < elem_step * rows_pyr[levels - 1] * rthis.ndisp)
temp_size = Size(static_cast<int>(elem_step), rows_pyr[levels - 1] * rthis.ndisp);
temp.create(temp_size, DataType<T>::type);
////////////////////////////////////////////////////////////////////////////
// Compute
load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
if (stream)
{
stream.enqueueMemSet(l[0], zero);
stream.enqueueMemSet(d[0], zero);
stream.enqueueMemSet(r[0], zero);
stream.enqueueMemSet(u[0], zero);
stream.enqueueMemSet(l[1], zero);
stream.enqueueMemSet(d[1], zero);
stream.enqueueMemSet(r[1], zero);
stream.enqueueMemSet(u[1], zero);
stream.enqueueMemSet(data_cost, zero);
stream.enqueueMemSet(data_cost_selected, zero);
}
else
{
l[0].setTo(zero);
d[0].setTo(zero);
r[0].setTo(zero);
u[0].setTo(zero);
l[1].setTo(zero);
d[1].setTo(zero);
r[1].setTo(zero);
u[1].setTo(zero);
data_cost.setTo(zero);
data_cost_selected.setTo(zero);
}
int cur_idx = 0;
for (int i = levels - 1; i >= 0; i--)
{
if (i == levels - 1)
{
init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(),
elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, cudaStream);
}
else
{
compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), elem_step,
left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), cudaStream);
int new_idx = (cur_idx + 1) & 1;
init_message(u[new_idx].ptr<T>(), d[new_idx].ptr<T>(), l[new_idx].ptr<T>(), r[new_idx].ptr<T>(),
u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(),
data_cost_selected.ptr<T>(), data_cost.ptr<T>(), elem_step, rows_pyr[i],
cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], cudaStream);
cur_idx = new_idx;
}
calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step,
rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, cudaStream);
}
if (disp.empty())
disp.create(rows, cols, CV_16S);
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
if (stream)
stream.enqueueMemSet(out, zero);
else
out.setTo(zero);
compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step, out, nr_plane_pyr[0], cudaStream);
if (disp.type() != CV_16S)
{
if (stream)
stream.enqueueConvert(out, disp, disp.type());
else
out.convertTo(disp, disp.type());
}
}
typedef void (*csbp_operator_t)(StereoConstantSpaceBP& rthis, GpuMat& mbuf,
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream);
const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
{
CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
operators[msg_type](*this, messages_buffers, temp, out, left, right, disp, stream);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -1,348 +0,0 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#ifdef HAVE_CUDA
using namespace cvtest;
//////////////////////////////////////////////////////////////////////////
// StereoBM
struct StereoBM : testing::TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(StereoBM, Regression)
{
cv::Mat left_image = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
cv::Mat disp_gold = readImage("stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(left_image.empty());
ASSERT_FALSE(right_image.empty());
ASSERT_FALSE(disp_gold.empty());
cv::gpu::StereoBM_GPU bm(0, 128, 19);
cv::gpu::GpuMat disp;
bm(loadMat(left_image), loadMat(right_image), disp);
EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
}
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBM, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////////
// StereoBeliefPropagation
struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(StereoBeliefPropagation, Regression)
{
cv::Mat left_image = readImage("stereobp/aloe-L.png");
cv::Mat right_image = readImage("stereobp/aloe-R.png");
cv::Mat disp_gold = readImage("stereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(left_image.empty());
ASSERT_FALSE(right_image.empty());
ASSERT_FALSE(disp_gold.empty());
cv::gpu::StereoBeliefPropagation bp(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
cv::gpu::GpuMat disp;
bp(loadMat(left_image), loadMat(right_image), disp);
cv::Mat h_disp(disp);
h_disp.convertTo(h_disp, disp_gold.depth());
EXPECT_MAT_NEAR(disp_gold, h_disp, 0.0);
}
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBeliefPropagation, ALL_DEVICES);
//////////////////////////////////////////////////////////////////////////
// StereoConstantSpaceBP
struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(StereoConstantSpaceBP, Regression)
{
cv::Mat left_image = readImage("csstereobp/aloe-L.png");
cv::Mat right_image = readImage("csstereobp/aloe-R.png");
cv::Mat disp_gold;
if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
disp_gold = readImage("csstereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
else
disp_gold = readImage("csstereobp/aloe-disp_CC1X.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(left_image.empty());
ASSERT_FALSE(right_image.empty());
ASSERT_FALSE(disp_gold.empty());
cv::gpu::StereoConstantSpaceBP csbp(128, 16, 4, 4);
cv::gpu::GpuMat disp;
csbp(loadMat(left_image), loadMat(right_image), disp);
cv::Mat h_disp(disp);
h_disp.convertTo(h_disp, disp_gold.depth());
EXPECT_MAT_NEAR(disp_gold, h_disp, 1.0);
}
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// transformPoints
struct TransformPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(TransformPoints, Accuracy)
{
cv::Mat src = randomMat(cv::Size(1000, 1), CV_32FC3, 0, 10);
cv::Mat rvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
cv::Mat tvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
cv::gpu::GpuMat dst;
cv::gpu::transformPoints(loadMat(src), rvec, tvec, dst);
ASSERT_EQ(src.size(), dst.size());
ASSERT_EQ(src.type(), dst.type());
cv::Mat h_dst(dst);
cv::Mat rot;
cv::Rodrigues(rvec, rot);
for (int i = 0; i < h_dst.cols; ++i)
{
cv::Point3f res = h_dst.at<cv::Point3f>(0, i);
cv::Point3f p = src.at<cv::Point3f>(0, i);
cv::Point3f res_gold(
rot.at<float>(0, 0) * p.x + rot.at<float>(0, 1) * p.y + rot.at<float>(0, 2) * p.z + tvec.at<float>(0, 0),
rot.at<float>(1, 0) * p.x + rot.at<float>(1, 1) * p.y + rot.at<float>(1, 2) * p.z + tvec.at<float>(0, 1),
rot.at<float>(2, 0) * p.x + rot.at<float>(2, 1) * p.y + rot.at<float>(2, 2) * p.z + tvec.at<float>(0, 2));
ASSERT_POINT3_NEAR(res_gold, res, 1e-5);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, TransformPoints, ALL_DEVICES);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// ProjectPoints
struct ProjectPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(ProjectPoints, Accuracy)
{
cv::Mat src = randomMat(cv::Size(1000, 1), CV_32FC3, 0, 10);
cv::Mat rvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
cv::Mat tvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
cv::Mat camera_mat = randomMat(cv::Size(3, 3), CV_32F, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
cv::gpu::GpuMat dst;
cv::gpu::projectPoints(loadMat(src), rvec, tvec, camera_mat, cv::Mat(), dst);
ASSERT_EQ(1, dst.rows);
ASSERT_EQ(MatType(CV_32FC2), MatType(dst.type()));
std::vector<cv::Point2f> dst_gold;
cv::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), dst_gold);
ASSERT_EQ(dst_gold.size(), static_cast<size_t>(dst.cols));
cv::Mat h_dst(dst);
for (size_t i = 0; i < dst_gold.size(); ++i)
{
cv::Point2f res = h_dst.at<cv::Point2f>(0, (int)i);
cv::Point2f res_gold = dst_gold[i];
ASSERT_LE(cv::norm(res_gold - res) / cv::norm(res_gold), 1e-3f);
}
}
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ProjectPoints, ALL_DEVICES);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// SolvePnPRansac
struct SolvePnPRansac : testing::TestWithParam<cv::gpu::DeviceInfo>
{
cv::gpu::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(SolvePnPRansac, Accuracy)
{
cv::Mat object = randomMat(cv::Size(5000, 1), CV_32FC3, 0, 100);
cv::Mat camera_mat = randomMat(cv::Size(3, 3), CV_32F, 0.5, 1);
camera_mat.at<float>(0, 1) = 0.f;
camera_mat.at<float>(1, 0) = 0.f;
camera_mat.at<float>(2, 0) = 0.f;
camera_mat.at<float>(2, 1) = 0.f;
std::vector<cv::Point2f> image_vec;
cv::Mat rvec_gold;
cv::Mat tvec_gold;
rvec_gold = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
tvec_gold = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), image_vec);
cv::Mat rvec, tvec;
std::vector<int> inliers;
cv::gpu::solvePnPRansac(object, cv::Mat(1, (int)image_vec.size(), CV_32FC2, &image_vec[0]),
camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)),
rvec, tvec, false, 200, 2.f, 100, &inliers);
ASSERT_LE(cv::norm(rvec - rvec_gold), 1e-3);
ASSERT_LE(cv::norm(tvec - tvec_gold), 1e-3);
}
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, SolvePnPRansac, ALL_DEVICES);
////////////////////////////////////////////////////////////////////////////////
// reprojectImageTo3D
PARAM_TEST_CASE(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int depth;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
depth = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(ReprojectImageTo3D, Accuracy)
{
cv::Mat disp = randomMat(size, depth, 5.0, 30.0);
cv::Mat Q = randomMat(cv::Size(4, 4), CV_32FC1, 0.1, 1.0);
cv::gpu::GpuMat dst;
cv::gpu::reprojectImageTo3D(loadMat(disp, useRoi), dst, Q, 3);
cv::Mat dst_gold;
cv::reprojectImageTo3D(disp, dst_gold, Q, false);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
}
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ReprojectImageTo3D, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatDepth(CV_8U), MatDepth(CV_16S)),
WHOLE_SUBMAT));
#endif // HAVE_CUDA