gpucalib3d module for camera calibration and stereo correspondence
This commit is contained in:
@@ -5,7 +5,7 @@ endif()
|
||||
set(the_description "GPU-accelerated Computer Vision")
|
||||
|
||||
ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy
|
||||
opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo
|
||||
opencv_gpuarithm opencv_gpufilters opencv_gpuimgproc opencv_gpufeatures2d opencv_gpuvideo opencv_gpucalib3d
|
||||
OPTIONAL opencv_gpunvidia)
|
||||
|
||||
ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
|
||||
|
@@ -1,499 +0,0 @@
|
||||
Camera Calibration and 3D Reconstruction
|
||||
========================================
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
|
||||
|
||||
gpu::StereoBM_GPU
|
||||
-----------------
|
||||
.. ocv:class:: gpu::StereoBM_GPU
|
||||
|
||||
Class computing stereo correspondence (disparity map) using the block matching algorithm. ::
|
||||
|
||||
class StereoBM_GPU
|
||||
{
|
||||
public:
|
||||
enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
|
||||
|
||||
enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
|
||||
|
||||
StereoBM_GPU();
|
||||
StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP,
|
||||
int winSize = DEFAULT_WINSZ);
|
||||
|
||||
void operator() (const GpuMat& left, const GpuMat& right,
|
||||
GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
|
||||
static bool checkIfGpuCallReasonable();
|
||||
|
||||
int preset;
|
||||
int ndisp;
|
||||
int winSize;
|
||||
|
||||
float avergeTexThreshold;
|
||||
|
||||
...
|
||||
};
|
||||
|
||||
|
||||
The class also performs pre- and post-filtering steps: Sobel pre-filtering (if ``PREFILTER_XSOBEL`` flag is set) and low textureness filtering (if ``averageTexThreshols > 0`` ). If ``avergeTexThreshold = 0`` , low textureness filtering is disabled. Otherwise, the disparity is set to 0 in each point ``(x, y)`` , where for the left image
|
||||
|
||||
.. math::
|
||||
\sum HorizontalGradiensInWindow(x, y, winSize) < (winSize \cdot winSize) \cdot avergeTexThreshold
|
||||
|
||||
This means that the input left image is low textured.
|
||||
|
||||
|
||||
|
||||
gpu::StereoBM_GPU::StereoBM_GPU
|
||||
-----------------------------------
|
||||
Enables :ocv:class:`gpu::StereoBM_GPU` constructors.
|
||||
|
||||
.. ocv:function:: gpu::StereoBM_GPU::StereoBM_GPU()
|
||||
|
||||
.. ocv:function:: gpu::StereoBM_GPU::StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ)
|
||||
|
||||
:param preset: Parameter presetting:
|
||||
|
||||
* **BASIC_PRESET** Basic mode without pre-processing.
|
||||
|
||||
* **PREFILTER_XSOBEL** Sobel pre-filtering mode.
|
||||
|
||||
:param ndisparities: Number of disparities. It must be a multiple of 8 and less or equal to 256.
|
||||
|
||||
:param winSize: Block size.
|
||||
|
||||
|
||||
|
||||
gpu::StereoBM_GPU::operator ()
|
||||
----------------------------------
|
||||
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
|
||||
|
||||
.. ocv:function:: void gpu::StereoBM_GPU::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null())
|
||||
|
||||
:param left: Left image. Only ``CV_8UC1`` type is supported.
|
||||
|
||||
:param right: Right image with the same size and the same type as the left one.
|
||||
|
||||
:param disparity: Output disparity map. It is a ``CV_8UC1`` image with the same size as the input images.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::StereoBM_GPU::checkIfGpuCallReasonable
|
||||
-----------------------------------------------
|
||||
Uses a heuristic method to estimate whether the current GPU is faster than the CPU in this algorithm. It queries the currently active device.
|
||||
|
||||
.. ocv:function:: bool gpu::StereoBM_GPU::checkIfGpuCallReasonable()
|
||||
|
||||
|
||||
|
||||
gpu::StereoBeliefPropagation
|
||||
----------------------------
|
||||
.. ocv:class:: gpu::StereoBeliefPropagation
|
||||
|
||||
Class computing stereo correspondence using the belief propagation algorithm. ::
|
||||
|
||||
class StereoBeliefPropagation
|
||||
{
|
||||
public:
|
||||
enum { DEFAULT_NDISP = 64 };
|
||||
enum { DEFAULT_ITERS = 5 };
|
||||
enum { DEFAULT_LEVELS = 5 };
|
||||
|
||||
static void estimateRecommendedParams(int width, int height,
|
||||
int& ndisp, int& iters, int& levels);
|
||||
|
||||
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int msg_type = CV_32F);
|
||||
StereoBeliefPropagation(int ndisp, int iters, int levels,
|
||||
float max_data_term, float data_weight,
|
||||
float max_disc_term, float disc_single_jump,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
void operator()(const GpuMat& left, const GpuMat& right,
|
||||
GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
|
||||
int ndisp;
|
||||
|
||||
int iters;
|
||||
int levels;
|
||||
|
||||
float max_data_term;
|
||||
float data_weight;
|
||||
float max_disc_term;
|
||||
float disc_single_jump;
|
||||
|
||||
int msg_type;
|
||||
|
||||
...
|
||||
};
|
||||
|
||||
The class implements algorithm described in [Felzenszwalb2006]_ . It can compute own data cost (using a truncated linear model) or use a user-provided data cost.
|
||||
|
||||
.. note::
|
||||
|
||||
``StereoBeliefPropagation`` requires a lot of memory for message storage:
|
||||
|
||||
.. math::
|
||||
|
||||
width \_ step \cdot height \cdot ndisp \cdot 4 \cdot (1 + 0.25)
|
||||
|
||||
and for data cost storage:
|
||||
|
||||
.. math::
|
||||
|
||||
width\_step \cdot height \cdot ndisp \cdot (1 + 0.25 + 0.0625 + \dotsm + \frac{1}{4^{levels}})
|
||||
|
||||
``width_step`` is the number of bytes in a line including padding.
|
||||
|
||||
|
||||
|
||||
gpu::StereoBeliefPropagation::StereoBeliefPropagation
|
||||
---------------------------------------------------------
|
||||
Enables the :ocv:class:`gpu::StereoBeliefPropagation` constructors.
|
||||
|
||||
.. ocv:function:: gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_32F)
|
||||
|
||||
.. ocv:function:: gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F)
|
||||
|
||||
:param ndisp: Number of disparities.
|
||||
|
||||
:param iters: Number of BP iterations on each level.
|
||||
|
||||
:param levels: Number of levels.
|
||||
|
||||
:param max_data_term: Threshold for data cost truncation.
|
||||
|
||||
:param data_weight: Data weight.
|
||||
|
||||
:param max_disc_term: Threshold for discontinuity truncation.
|
||||
|
||||
:param disc_single_jump: Discontinuity single jump.
|
||||
|
||||
:param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported.
|
||||
|
||||
``StereoBeliefPropagation`` uses a truncated linear model for the data cost and discontinuity terms:
|
||||
|
||||
.. math::
|
||||
|
||||
DataCost = data \_ weight \cdot \min ( \lvert Img_Left(x,y)-Img_Right(x-d,y) \rvert , max \_ data \_ term)
|
||||
|
||||
.. math::
|
||||
|
||||
DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)
|
||||
|
||||
For more details, see [Felzenszwalb2006]_.
|
||||
|
||||
By default, :ocv:class:`gpu::StereoBeliefPropagation` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
|
||||
|
||||
.. math::
|
||||
|
||||
10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX
|
||||
|
||||
|
||||
|
||||
gpu::StereoBeliefPropagation::estimateRecommendedParams
|
||||
-----------------------------------------------------------
|
||||
Uses a heuristic method to compute the recommended parameters ( ``ndisp``, ``iters`` and ``levels`` ) for the specified image size ( ``width`` and ``height`` ).
|
||||
|
||||
.. ocv:function:: void gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
|
||||
|
||||
|
||||
|
||||
gpu::StereoBeliefPropagation::operator ()
|
||||
---------------------------------------------
|
||||
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair or data cost.
|
||||
|
||||
.. ocv:function:: void gpu::StereoBeliefPropagation::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null())
|
||||
|
||||
.. ocv:function:: void gpu::StereoBeliefPropagation::operator ()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null())
|
||||
|
||||
:param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported.
|
||||
|
||||
:param right: Right image with the same size and the same type as the left one.
|
||||
|
||||
:param data: User-specified data cost, a matrix of ``msg_type`` type and ``Size(<image columns>*ndisp, <image rows>)`` size.
|
||||
|
||||
:param disparity: Output disparity map. If ``disparity`` is empty, the output type is ``CV_16SC1`` . Otherwise, the type is retained.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::StereoConstantSpaceBP
|
||||
--------------------------
|
||||
.. ocv:class:: gpu::StereoConstantSpaceBP
|
||||
|
||||
Class computing stereo correspondence using the constant space belief propagation algorithm. ::
|
||||
|
||||
class StereoConstantSpaceBP
|
||||
{
|
||||
public:
|
||||
enum { DEFAULT_NDISP = 128 };
|
||||
enum { DEFAULT_ITERS = 8 };
|
||||
enum { DEFAULT_LEVELS = 4 };
|
||||
enum { DEFAULT_NR_PLANE = 4 };
|
||||
|
||||
static void estimateRecommendedParams(int width, int height,
|
||||
int& ndisp, int& iters, int& levels, int& nr_plane);
|
||||
|
||||
explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int nr_plane = DEFAULT_NR_PLANE,
|
||||
int msg_type = CV_32F);
|
||||
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
|
||||
float max_data_term, float data_weight,
|
||||
float max_disc_term, float disc_single_jump,
|
||||
int min_disp_th = 0,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
void operator()(const GpuMat& left, const GpuMat& right,
|
||||
GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
|
||||
int ndisp;
|
||||
|
||||
int iters;
|
||||
int levels;
|
||||
|
||||
int nr_plane;
|
||||
|
||||
float max_data_term;
|
||||
float data_weight;
|
||||
float max_disc_term;
|
||||
float disc_single_jump;
|
||||
|
||||
int min_disp_th;
|
||||
|
||||
int msg_type;
|
||||
|
||||
bool use_local_init_data_cost;
|
||||
|
||||
...
|
||||
};
|
||||
|
||||
|
||||
The class implements algorithm described in [Yang2010]_. ``StereoConstantSpaceBP`` supports both local minimum and global minimum data cost initialization algorithms. For more details, see the paper mentioned above. By default, a local algorithm is used. To enable a global algorithm, set ``use_local_init_data_cost`` to ``false`` .
|
||||
|
||||
|
||||
|
||||
gpu::StereoConstantSpaceBP::StereoConstantSpaceBP
|
||||
-----------------------------------------------------
|
||||
Enables the :ocv:class:`gpu::StereoConstantSpaceBP` constructors.
|
||||
|
||||
.. ocv:function:: gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F)
|
||||
|
||||
.. ocv:function:: gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th = 0, int msg_type = CV_32F)
|
||||
|
||||
:param ndisp: Number of disparities.
|
||||
|
||||
:param iters: Number of BP iterations on each level.
|
||||
|
||||
:param levels: Number of levels.
|
||||
|
||||
:param nr_plane: Number of disparity levels on the first level.
|
||||
|
||||
:param max_data_term: Truncation of data cost.
|
||||
|
||||
:param data_weight: Data weight.
|
||||
|
||||
:param max_disc_term: Truncation of discontinuity.
|
||||
|
||||
:param disc_single_jump: Discontinuity single jump.
|
||||
|
||||
:param min_disp_th: Minimal disparity threshold.
|
||||
|
||||
:param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported.
|
||||
|
||||
``StereoConstantSpaceBP`` uses a truncated linear model for the data cost and discontinuity terms:
|
||||
|
||||
.. math::
|
||||
|
||||
DataCost = data \_ weight \cdot \min ( \lvert I_2-I_1 \rvert , max \_ data \_ term)
|
||||
|
||||
.. math::
|
||||
|
||||
DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)
|
||||
|
||||
For more details, see [Yang2010]_.
|
||||
|
||||
By default, ``StereoConstantSpaceBP`` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
|
||||
|
||||
.. math::
|
||||
|
||||
10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX
|
||||
|
||||
|
||||
|
||||
gpu::StereoConstantSpaceBP::estimateRecommendedParams
|
||||
---------------------------------------------------------
|
||||
Uses a heuristic method to compute parameters (ndisp, iters, levelsand nrplane) for the specified image size (widthand height).
|
||||
|
||||
.. ocv:function:: void gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
|
||||
|
||||
|
||||
|
||||
gpu::StereoConstantSpaceBP::operator ()
|
||||
-------------------------------------------
|
||||
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
|
||||
|
||||
.. ocv:function:: void gpu::StereoConstantSpaceBP::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null())
|
||||
|
||||
:param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported.
|
||||
|
||||
:param right: Right image with the same size and the same type as the left one.
|
||||
|
||||
:param disparity: Output disparity map. If ``disparity`` is empty, the output type is ``CV_16SC1`` . Otherwise, the output type is ``disparity.type()`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::DisparityBilateralFilter
|
||||
-----------------------------
|
||||
.. ocv:class:: gpu::DisparityBilateralFilter
|
||||
|
||||
Class refining a disparity map using joint bilateral filtering. ::
|
||||
|
||||
class CV_EXPORTS DisparityBilateralFilter
|
||||
{
|
||||
public:
|
||||
enum { DEFAULT_NDISP = 64 };
|
||||
enum { DEFAULT_RADIUS = 3 };
|
||||
enum { DEFAULT_ITERS = 1 };
|
||||
|
||||
explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP,
|
||||
int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS);
|
||||
|
||||
DisparityBilateralFilter(int ndisp, int radius, int iters,
|
||||
float edge_threshold, float max_disc_threshold,
|
||||
float sigma_range);
|
||||
|
||||
void operator()(const GpuMat& disparity, const GpuMat& image,
|
||||
GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
...
|
||||
};
|
||||
|
||||
|
||||
The class implements [Yang2010]_ algorithm.
|
||||
|
||||
|
||||
|
||||
gpu::DisparityBilateralFilter::DisparityBilateralFilter
|
||||
-----------------------------------------------------------
|
||||
Enables the :ocv:class:`gpu::DisparityBilateralFilter` constructors.
|
||||
|
||||
.. ocv:function:: gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS)
|
||||
|
||||
.. ocv:function:: gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range)
|
||||
|
||||
:param ndisp: Number of disparities.
|
||||
|
||||
:param radius: Filter radius.
|
||||
|
||||
:param iters: Number of iterations.
|
||||
|
||||
:param edge_threshold: Threshold for edges.
|
||||
|
||||
:param max_disc_threshold: Constant to reject outliers.
|
||||
|
||||
:param sigma_range: Filter range.
|
||||
|
||||
|
||||
|
||||
gpu::DisparityBilateralFilter::operator ()
|
||||
----------------------------------------------
|
||||
Refines a disparity map using joint bilateral filtering.
|
||||
|
||||
.. ocv:function:: void gpu::DisparityBilateralFilter::operator ()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param disparity: Input disparity map. ``CV_8UC1`` and ``CV_16SC1`` types are supported.
|
||||
|
||||
:param image: Input image. ``CV_8UC1`` and ``CV_8UC3`` types are supported.
|
||||
|
||||
:param dst: Destination disparity map. It has the same size and type as ``disparity`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::drawColorDisp
|
||||
----------------------
|
||||
Colors a disparity image.
|
||||
|
||||
.. ocv:function:: void gpu::drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null())
|
||||
|
||||
:param src_disp: Source disparity image. ``CV_8UC1`` and ``CV_16SC1`` types are supported.
|
||||
|
||||
:param dst_disp: Output disparity image. It has the same size as ``src_disp`` . The type is ``CV_8UC4`` in ``BGRA`` format (alpha = 255).
|
||||
|
||||
:param ndisp: Number of disparities.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
This function draws a colored disparity map by converting disparity values from ``[0..ndisp)`` interval first to ``HSV`` color space (where different disparity values correspond to different hues) and then converting the pixels to ``RGB`` for visualization.
|
||||
|
||||
|
||||
|
||||
gpu::reprojectImageTo3D
|
||||
---------------------------
|
||||
Reprojects a disparity image to 3D space.
|
||||
|
||||
.. ocv:function:: void gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null())
|
||||
|
||||
:param disp: Input disparity image. ``CV_8U`` and ``CV_16S`` types are supported.
|
||||
|
||||
:param xyzw: Output 3- or 4-channel floating-point image of the same size as ``disp`` . Each element of ``xyzw(x,y)`` contains 3D coordinates ``(x,y,z)`` or ``(x,y,z,1)`` of the point ``(x,y)`` , computed from the disparity map.
|
||||
|
||||
:param Q: :math:`4 \times 4` perspective transformation matrix that can be obtained via :ocv:func:`stereoRectify` .
|
||||
|
||||
:param dst_cn: The number of channels for output image. Can be 3 or 4.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`reprojectImageTo3D`
|
||||
|
||||
|
||||
|
||||
gpu::solvePnPRansac
|
||||
-------------------
|
||||
Finds the object pose from 3D-2D point correspondences.
|
||||
|
||||
.. ocv:function:: void gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, int num_iters=100, float max_dist=8.0, int min_inlier_count=100, vector<int>* inliers=NULL)
|
||||
|
||||
:param object: Single-row matrix of object points.
|
||||
|
||||
:param image: Single-row matrix of image points.
|
||||
|
||||
:param camera_mat: 3x3 matrix of intrinsic camera parameters.
|
||||
|
||||
:param dist_coef: Distortion coefficients. See :ocv:func:`undistortPoints` for details.
|
||||
|
||||
:param rvec: Output 3D rotation vector.
|
||||
|
||||
:param tvec: Output 3D translation vector.
|
||||
|
||||
:param use_extrinsic_guess: Flag to indicate that the function must use ``rvec`` and ``tvec`` as an initial transformation guess. It is not supported for now.
|
||||
|
||||
:param num_iters: Maximum number of RANSAC iterations.
|
||||
|
||||
:param max_dist: Euclidean distance threshold to detect whether point is inlier or not.
|
||||
|
||||
:param min_inlier_count: Flag to indicate that the function must stop if greater or equal number of inliers is achieved. It is not supported for now.
|
||||
|
||||
:param inliers: Output vector of inlier indices.
|
||||
|
||||
.. seealso:: :ocv:func:`solvePnPRansac`
|
||||
|
||||
|
||||
|
||||
.. [Felzenszwalb2006] Pedro F. Felzenszwalb algorithm [Pedro F. Felzenszwalb and Daniel P. Huttenlocher. *Efficient belief propagation for early vision*. International Journal of Computer Vision, 70(1), October 2006
|
||||
|
||||
.. [Yang2010] Q. Yang, L. Wang, and N. Ahuja. *A constant-space belief propagation algorithm for stereo matching*. In CVPR, 2010.
|
@@ -9,4 +9,3 @@ gpu. GPU-accelerated Computer Vision
|
||||
initalization_and_information
|
||||
data_structures
|
||||
object_detection
|
||||
camera_calibration_and_3d_reconstruction
|
||||
|
@@ -55,6 +55,7 @@
|
||||
#include "opencv2/gpuimgproc.hpp"
|
||||
#include "opencv2/gpufeatures2d.hpp"
|
||||
#include "opencv2/gpuvideo.hpp"
|
||||
#include "opencv2/gpucalib3d.hpp"
|
||||
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
@@ -68,18 +69,6 @@ namespace cv { namespace gpu {
|
||||
|
||||
///////////////////////////// Calibration 3D //////////////////////////////////
|
||||
|
||||
CV_EXPORTS void transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
||||
GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
CV_EXPORTS void projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec,
|
||||
const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst,
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
||||
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
|
||||
int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
|
||||
std::vector<int>* inliers=NULL);
|
||||
|
||||
//////////////////////////////// Image Labeling ////////////////////////////////
|
||||
|
||||
|
||||
@@ -90,190 +79,17 @@ CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& c
|
||||
|
||||
//////////////////////////////// StereoBM_GPU ////////////////////////////////
|
||||
|
||||
class CV_EXPORTS StereoBM_GPU
|
||||
{
|
||||
public:
|
||||
enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 };
|
||||
|
||||
enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
|
||||
|
||||
//! the default constructor
|
||||
StereoBM_GPU();
|
||||
//! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
|
||||
StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
|
||||
|
||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
|
||||
//! Output disparity has CV_8U type.
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
|
||||
//! Some heuristics that tries to estmate
|
||||
// if current GPU will be faster than CPU in this algorithm.
|
||||
// It queries current active device.
|
||||
static bool checkIfGpuCallReasonable();
|
||||
|
||||
int preset;
|
||||
int ndisp;
|
||||
int winSize;
|
||||
|
||||
// If avergeTexThreshold == 0 => post procesing is disabled
|
||||
// If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
|
||||
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
|
||||
// i.e. input left image is low textured.
|
||||
float avergeTexThreshold;
|
||||
|
||||
private:
|
||||
GpuMat minSSD, leBuf, riBuf;
|
||||
};
|
||||
|
||||
////////////////////////// StereoBeliefPropagation ///////////////////////////
|
||||
// "Efficient Belief Propagation for Early Vision"
|
||||
// P.Felzenszwalb
|
||||
|
||||
class CV_EXPORTS StereoBeliefPropagation
|
||||
{
|
||||
public:
|
||||
enum { DEFAULT_NDISP = 64 };
|
||||
enum { DEFAULT_ITERS = 5 };
|
||||
enum { DEFAULT_LEVELS = 5 };
|
||||
|
||||
static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels);
|
||||
|
||||
//! the default constructor
|
||||
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
//! the full constructor taking the number of disparities, number of BP iterations on each level,
|
||||
//! number of levels, truncation of data cost, data weight,
|
||||
//! truncation of discontinuity cost and discontinuity single jump
|
||||
//! DataTerm = data_weight * min(fabs(I2-I1), max_data_term)
|
||||
//! DiscTerm = min(disc_single_jump * fabs(f1-f2), max_disc_term)
|
||||
//! please see paper for more details
|
||||
StereoBeliefPropagation(int ndisp, int iters, int levels,
|
||||
float max_data_term, float data_weight,
|
||||
float max_disc_term, float disc_single_jump,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
||||
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
|
||||
|
||||
//! version for user specified data term
|
||||
void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
|
||||
int ndisp;
|
||||
|
||||
int iters;
|
||||
int levels;
|
||||
|
||||
float max_data_term;
|
||||
float data_weight;
|
||||
float max_disc_term;
|
||||
float disc_single_jump;
|
||||
|
||||
int msg_type;
|
||||
private:
|
||||
GpuMat u, d, l, r, u2, d2, l2, r2;
|
||||
std::vector<GpuMat> datas;
|
||||
GpuMat out;
|
||||
};
|
||||
|
||||
/////////////////////////// StereoConstantSpaceBP ///////////////////////////
|
||||
// "A Constant-Space Belief Propagation Algorithm for Stereo Matching"
|
||||
// Qingxiong Yang, Liang Wang, Narendra Ahuja
|
||||
// http://vision.ai.uiuc.edu/~qyang6/
|
||||
|
||||
class CV_EXPORTS StereoConstantSpaceBP
|
||||
{
|
||||
public:
|
||||
enum { DEFAULT_NDISP = 128 };
|
||||
enum { DEFAULT_ITERS = 8 };
|
||||
enum { DEFAULT_LEVELS = 4 };
|
||||
enum { DEFAULT_NR_PLANE = 4 };
|
||||
|
||||
static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane);
|
||||
|
||||
//! the default constructor
|
||||
explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP,
|
||||
int iters = DEFAULT_ITERS,
|
||||
int levels = DEFAULT_LEVELS,
|
||||
int nr_plane = DEFAULT_NR_PLANE,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
//! the full constructor taking the number of disparities, number of BP iterations on each level,
|
||||
//! number of levels, number of active disparity on the first level, truncation of data cost, data weight,
|
||||
//! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold
|
||||
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
|
||||
float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
|
||||
int min_disp_th = 0,
|
||||
int msg_type = CV_32F);
|
||||
|
||||
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair,
|
||||
//! if disparity is empty output type will be CV_16S else output type will be disparity.type().
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null());
|
||||
|
||||
int ndisp;
|
||||
|
||||
int iters;
|
||||
int levels;
|
||||
|
||||
int nr_plane;
|
||||
|
||||
float max_data_term;
|
||||
float data_weight;
|
||||
float max_disc_term;
|
||||
float disc_single_jump;
|
||||
|
||||
int min_disp_th;
|
||||
|
||||
int msg_type;
|
||||
|
||||
bool use_local_init_data_cost;
|
||||
private:
|
||||
GpuMat messages_buffers;
|
||||
|
||||
GpuMat temp;
|
||||
GpuMat out;
|
||||
};
|
||||
|
||||
/////////////////////////// DisparityBilateralFilter ///////////////////////////
|
||||
// Disparity map refinement using joint bilateral filtering given a single color image.
|
||||
// Qingxiong Yang, Liang Wang, Narendra Ahuja
|
||||
// http://vision.ai.uiuc.edu/~qyang6/
|
||||
|
||||
class CV_EXPORTS DisparityBilateralFilter
|
||||
{
|
||||
public:
|
||||
enum { DEFAULT_NDISP = 64 };
|
||||
enum { DEFAULT_RADIUS = 3 };
|
||||
enum { DEFAULT_ITERS = 1 };
|
||||
|
||||
//! the default constructor
|
||||
explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS);
|
||||
|
||||
//! the full constructor taking the number of disparities, filter radius,
|
||||
//! number of iterations, truncation of data continuity, truncation of disparity continuity
|
||||
//! and filter range sigma
|
||||
DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range);
|
||||
|
||||
//! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image.
|
||||
//! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type.
|
||||
void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
private:
|
||||
int ndisp;
|
||||
int radius;
|
||||
int iters;
|
||||
|
||||
float edge_threshold;
|
||||
float max_disc_threshold;
|
||||
float sigma_range;
|
||||
|
||||
GpuMat table_color;
|
||||
GpuMat table_space;
|
||||
};
|
||||
|
||||
|
||||
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
|
||||
|
@@ -1,371 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
using namespace perf;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBM
|
||||
|
||||
typedef std::tr1::tuple<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImagePair, pair_string);
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBM,
|
||||
Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
|
||||
{
|
||||
declare.time(300.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int preset = 0;
|
||||
const int ndisp = 256;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_bm(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Ptr<cv::StereoBM> bm = cv::createStereoBM(ndisp);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
TEST_CYCLE() bm->compute(imgLeft, imgRight, dst);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBeliefPropagation
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
|
||||
Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
|
||||
{
|
||||
declare.time(300.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0));
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1));
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 64;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_bp(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoConstantSpaceBP
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
|
||||
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
|
||||
{
|
||||
declare.time(300.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_csbp(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DisparityBilateralFilter
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
|
||||
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
|
||||
{
|
||||
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
const cv::Mat disp = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(disp.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
const cv::gpu::GpuMat d_disp(disp);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_filter(d_disp, d_img, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// TransformPoints
|
||||
|
||||
DEF_PARAM_TEST_1(Count, int);
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_TransformPoints,
|
||||
Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::transformPoints(d_src, rvec, tvec, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ProjectPoints
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_ProjectPoints,
|
||||
Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat camera_mat = cv::Mat::ones(3, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::projectPoints(d_src, rvec, tvec, camera_mat, cv::Mat(), dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
TEST_CYCLE() cv::projectPoints(src, rvec, tvec, camera_mat, cv::noArray(), dst);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// SolvePnPRansac
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
|
||||
Values(5000, 10000, 20000))
|
||||
{
|
||||
declare.time(10.0);
|
||||
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat object(1, count, CV_32FC3);
|
||||
declare.in(object, WARMUP_RNG);
|
||||
|
||||
cv::Mat camera_mat(3, 3, CV_32FC1);
|
||||
cv::randu(camera_mat, 0.5, 1);
|
||||
camera_mat.at<float>(0, 1) = 0.f;
|
||||
camera_mat.at<float>(1, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 1) = 0.f;
|
||||
|
||||
const cv::Mat dist_coef(1, 8, CV_32F, cv::Scalar::all(0));
|
||||
|
||||
cv::Mat rvec_gold(1, 3, CV_32FC1);
|
||||
cv::randu(rvec_gold, 0, 1);
|
||||
|
||||
cv::Mat tvec_gold(1, 3, CV_32FC1);
|
||||
cv::randu(tvec_gold, 0, 1);
|
||||
|
||||
std::vector<cv::Point2f> image_vec;
|
||||
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, dist_coef, image_vec);
|
||||
|
||||
const cv::Mat image(1, count, CV_32FC2, &image_vec[0]);
|
||||
|
||||
cv::Mat rvec;
|
||||
cv::Mat tvec;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
TEST_CYCLE() cv::gpu::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
GPU_SANITY_CHECK(rvec, 1e-3);
|
||||
GPU_SANITY_CHECK(tvec, 1e-3);
|
||||
}
|
||||
else
|
||||
{
|
||||
TEST_CYCLE() cv::solvePnPRansac(object, image, camera_mat, dist_coef, rvec, tvec);
|
||||
|
||||
CPU_SANITY_CHECK(rvec, 1e-6);
|
||||
CPU_SANITY_CHECK(tvec, 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ReprojectImageTo3D
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, depth);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
cv::Mat Q(4, 4, CV_32FC1);
|
||||
cv::randu(Q, 0.1, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::reprojectImageTo3D(d_src, dst, Q);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
TEST_CYCLE() cv::reprojectImageTo3D(src, dst, Q);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DrawColorDisp
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::drawColorDisp(d_src, dst, 255);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
@@ -1,294 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, std::vector<int>*) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace transform_points
|
||||
{
|
||||
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, PtrStepSz<float3> dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace project_points
|
||||
{
|
||||
void call(const PtrStepSz<float3> src, const float* rot, const float* transl, const float* proj, PtrStepSz<float2> dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace solve_pnp_ransac
|
||||
{
|
||||
int maxNumIters();
|
||||
|
||||
void computeHypothesisScores(
|
||||
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
||||
const float3* transl_vectors, const float3* object, const float2* image,
|
||||
const float dist_threshold, int* hypothesis_scores);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::cudev;
|
||||
|
||||
namespace
|
||||
{
|
||||
void transformPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
|
||||
|
||||
// Convert rotation vector into matrix
|
||||
Mat rot;
|
||||
Rodrigues(rvec, rot);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
transform_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), dst, stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src.rows == 1 && src.cols > 0 && src.type() == CV_32FC3);
|
||||
CV_Assert(rvec.size() == Size(3, 1) && rvec.type() == CV_32F);
|
||||
CV_Assert(tvec.size() == Size(3, 1) && tvec.type() == CV_32F);
|
||||
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
|
||||
CV_Assert(dist_coef.empty()); // Undistortion isn't supported
|
||||
|
||||
// Convert rotation vector into matrix
|
||||
Mat rot;
|
||||
Rodrigues(rvec, rot);
|
||||
|
||||
dst.create(src.size(), CV_32FC2);
|
||||
project_points::call(src, rot.ptr<float>(), tvec.ptr<float>(), camera_mat.ptr<float>(), dst,stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
// Selects subset_size random different points from [0, num_points - 1] range
|
||||
void selectRandom(int subset_size, int num_points, std::vector<int>& subset)
|
||||
{
|
||||
subset.resize(subset_size);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
{
|
||||
bool was;
|
||||
do
|
||||
{
|
||||
subset[i] = rand() % num_points;
|
||||
was = false;
|
||||
for (int j = 0; j < i; ++j)
|
||||
if (subset[j] == subset[i])
|
||||
{
|
||||
was = true;
|
||||
break;
|
||||
}
|
||||
} while (was);
|
||||
}
|
||||
}
|
||||
|
||||
// Computes rotation, translation pair for small subsets if the input data
|
||||
class TransformHypothesesGenerator
|
||||
{
|
||||
public:
|
||||
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
|
||||
const Mat& camera_mat_, int num_points_, int subset_size_,
|
||||
Mat rot_matrices_, Mat transl_vectors_)
|
||||
: object(&object_), image(&image_), dist_coef(&dist_coef_), camera_mat(&camera_mat_),
|
||||
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
|
||||
transl_vectors(transl_vectors_) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
{
|
||||
// Input data for generation of the current hypothesis
|
||||
std::vector<int> subset_indices(subset_size);
|
||||
Mat_<Point3f> object_subset(1, subset_size);
|
||||
Mat_<Point2f> image_subset(1, subset_size);
|
||||
|
||||
// Current hypothesis data
|
||||
Mat rot_vec(1, 3, CV_64F);
|
||||
Mat rot_mat(3, 3, CV_64F);
|
||||
Mat transl_vec(1, 3, CV_64F);
|
||||
|
||||
for (int iter = range.begin(); iter < range.end(); ++iter)
|
||||
{
|
||||
selectRandom(subset_size, num_points, subset_indices);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
{
|
||||
object_subset(0, i) = object->at<Point3f>(subset_indices[i]);
|
||||
image_subset(0, i) = image->at<Point2f>(subset_indices[i]);
|
||||
}
|
||||
|
||||
solvePnP(object_subset, image_subset, *camera_mat, *dist_coef, rot_vec, transl_vec);
|
||||
|
||||
// Remember translation vector
|
||||
Mat transl_vec_ = transl_vectors.colRange(iter * 3, (iter + 1) * 3);
|
||||
transl_vec = transl_vec.reshape(0, 1);
|
||||
transl_vec.convertTo(transl_vec_, CV_32F);
|
||||
|
||||
// Remember rotation matrix
|
||||
Rodrigues(rot_vec, rot_mat);
|
||||
Mat rot_mat_ = rot_matrices.colRange(iter * 9, (iter + 1) * 9).reshape(0, 3);
|
||||
rot_mat.convertTo(rot_mat_, CV_32F);
|
||||
}
|
||||
}
|
||||
|
||||
const Mat* object;
|
||||
const Mat* image;
|
||||
const Mat* dist_coef;
|
||||
const Mat* camera_mat;
|
||||
int num_points;
|
||||
int subset_size;
|
||||
|
||||
// Hypotheses storage (global)
|
||||
Mat rot_matrices;
|
||||
Mat transl_vectors;
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
||||
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess,
|
||||
int num_iters, float max_dist, int min_inlier_count,
|
||||
std::vector<int>* inliers)
|
||||
{
|
||||
(void)min_inlier_count;
|
||||
CV_Assert(object.rows == 1 && object.cols > 0 && object.type() == CV_32FC3);
|
||||
CV_Assert(image.rows == 1 && image.cols > 0 && image.type() == CV_32FC2);
|
||||
CV_Assert(object.cols == image.cols);
|
||||
CV_Assert(camera_mat.size() == Size(3, 3) && camera_mat.type() == CV_32F);
|
||||
CV_Assert(!use_extrinsic_guess); // We don't support initial guess for now
|
||||
CV_Assert(num_iters <= solve_pnp_ransac::maxNumIters());
|
||||
|
||||
const int subset_size = 4;
|
||||
const int num_points = object.cols;
|
||||
CV_Assert(num_points >= subset_size);
|
||||
|
||||
// Unapply distortion and intrinsic camera transformations
|
||||
Mat eye_camera_mat = Mat::eye(3, 3, CV_32F);
|
||||
Mat empty_dist_coef;
|
||||
Mat image_normalized;
|
||||
undistortPoints(image, image_normalized, camera_mat, dist_coef, Mat(), eye_camera_mat);
|
||||
|
||||
// Hypotheses storage (global)
|
||||
Mat rot_matrices(1, num_iters * 9, CV_32F);
|
||||
Mat transl_vectors(1, num_iters * 3, CV_32F);
|
||||
|
||||
// Generate set of hypotheses using small subsets of the input data
|
||||
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
|
||||
num_points, subset_size, rot_matrices, transl_vectors);
|
||||
parallel_for(BlockedRange(0, num_iters), body);
|
||||
|
||||
// Compute scores (i.e. number of inliers) for each hypothesis
|
||||
GpuMat d_object(object);
|
||||
GpuMat d_image_normalized(image_normalized);
|
||||
GpuMat d_hypothesis_scores(1, num_iters, CV_32S);
|
||||
solve_pnp_ransac::computeHypothesisScores(
|
||||
num_iters, num_points, rot_matrices.ptr<float>(), transl_vectors.ptr<float3>(),
|
||||
d_object.ptr<float3>(), d_image_normalized.ptr<float2>(), max_dist * max_dist,
|
||||
d_hypothesis_scores.ptr<int>());
|
||||
|
||||
// Find the best hypothesis index
|
||||
Point best_idx;
|
||||
double best_score;
|
||||
minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
|
||||
int num_inliers = static_cast<int>(best_score);
|
||||
|
||||
// Extract the best hypothesis data
|
||||
|
||||
Mat rot_mat = rot_matrices.colRange(best_idx.x * 9, (best_idx.x + 1) * 9).reshape(0, 3);
|
||||
Rodrigues(rot_mat, rvec);
|
||||
rvec = rvec.reshape(0, 1);
|
||||
|
||||
tvec = transl_vectors.colRange(best_idx.x * 3, (best_idx.x + 1) * 3).clone();
|
||||
tvec = tvec.reshape(0, 1);
|
||||
|
||||
// Build vector of inlier indices
|
||||
if (inliers != NULL)
|
||||
{
|
||||
inliers->clear();
|
||||
inliers->reserve(num_inliers);
|
||||
|
||||
Point3f p, p_transf;
|
||||
Point2f p_proj;
|
||||
const float* rot = rot_mat.ptr<float>();
|
||||
const float* transl = tvec.ptr<float>();
|
||||
|
||||
for (int i = 0; i < num_points; ++i)
|
||||
{
|
||||
p = object.at<Point3f>(0, i);
|
||||
p_transf.x = rot[0] * p.x + rot[1] * p.y + rot[2] * p.z + transl[0];
|
||||
p_transf.y = rot[3] * p.x + rot[4] * p.y + rot[5] * p.z + transl[1];
|
||||
p_transf.z = rot[6] * p.x + rot[7] * p.y + rot[8] * p.z + transl[2];
|
||||
p_proj.x = p_transf.x / p_transf.z;
|
||||
p_proj.y = p_transf.y / p_transf.z;
|
||||
if (norm(p_proj - image_normalized.at<Point2f>(0, i)) < max_dist)
|
||||
inliers->push_back(i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
@@ -1,193 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/transform.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
||||
|
||||
namespace transform_points
|
||||
{
|
||||
__constant__ float3 crot0;
|
||||
__constant__ float3 crot1;
|
||||
__constant__ float3 crot2;
|
||||
__constant__ float3 ctransl;
|
||||
|
||||
struct TransformOp : unary_function<float3, float3>
|
||||
{
|
||||
__device__ __forceinline__ float3 operator()(const float3& p) const
|
||||
{
|
||||
return make_float3(
|
||||
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
|
||||
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
|
||||
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
|
||||
}
|
||||
__device__ __forceinline__ TransformOp() {}
|
||||
__device__ __forceinline__ TransformOp(const TransformOp&) {}
|
||||
};
|
||||
|
||||
void call(const PtrStepSz<float3> src, const float* rot,
|
||||
const float* transl, PtrStepSz<float3> dst,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
cv::gpu::cudev::transform(src, dst, TransformOp(), WithOutMask(), stream);
|
||||
}
|
||||
} // namespace transform_points
|
||||
|
||||
namespace project_points
|
||||
{
|
||||
__constant__ float3 crot0;
|
||||
__constant__ float3 crot1;
|
||||
__constant__ float3 crot2;
|
||||
__constant__ float3 ctransl;
|
||||
__constant__ float3 cproj0;
|
||||
__constant__ float3 cproj1;
|
||||
|
||||
struct ProjectOp : unary_function<float3, float3>
|
||||
{
|
||||
__device__ __forceinline__ float2 operator()(const float3& p) const
|
||||
{
|
||||
// Rotate and translate in 3D
|
||||
float3 t = make_float3(
|
||||
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
|
||||
crot1.x * p.x + crot1.y * p.y + crot1.z * p.z + ctransl.y,
|
||||
crot2.x * p.x + crot2.y * p.y + crot2.z * p.z + ctransl.z);
|
||||
// Project on 2D plane
|
||||
return make_float2(
|
||||
(cproj0.x * t.x + cproj0.y * t.y) / t.z + cproj0.z,
|
||||
(cproj1.x * t.x + cproj1.y * t.y) / t.z + cproj1.z);
|
||||
}
|
||||
__device__ __forceinline__ ProjectOp() {}
|
||||
__device__ __forceinline__ ProjectOp(const ProjectOp&) {}
|
||||
};
|
||||
|
||||
void call(const PtrStepSz<float3> src, const float* rot,
|
||||
const float* transl, const float* proj, PtrStepSz<float2> dst,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot0, rot, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
||||
cv::gpu::cudev::transform(src, dst, ProjectOp(), WithOutMask(), stream);
|
||||
}
|
||||
} // namespace project_points
|
||||
|
||||
namespace solve_pnp_ransac
|
||||
{
|
||||
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
|
||||
__constant__ float3 ctransl_vectors[SOLVE_PNP_RANSAC_MAX_NUM_ITERS];
|
||||
|
||||
int maxNumIters()
|
||||
{
|
||||
return SOLVE_PNP_RANSAC_MAX_NUM_ITERS;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ float sqr(float x)
|
||||
{
|
||||
return x * x;
|
||||
}
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
__global__ void computeHypothesisScoresKernel(
|
||||
const int num_points, const float3* object, const float2* image,
|
||||
const float dist_threshold, int* g_num_inliers)
|
||||
{
|
||||
const float3* const &rot_mat = crot_matrices + blockIdx.x * 3;
|
||||
const float3 &transl_vec = ctransl_vectors[blockIdx.x];
|
||||
int num_inliers = 0;
|
||||
|
||||
for (int i = threadIdx.x; i < num_points; i += blockDim.x)
|
||||
{
|
||||
float3 p = object[i];
|
||||
p = make_float3(
|
||||
rot_mat[0].x * p.x + rot_mat[0].y * p.y + rot_mat[0].z * p.z + transl_vec.x,
|
||||
rot_mat[1].x * p.x + rot_mat[1].y * p.y + rot_mat[1].z * p.z + transl_vec.y,
|
||||
rot_mat[2].x * p.x + rot_mat[2].y * p.y + rot_mat[2].z * p.z + transl_vec.z);
|
||||
p.x /= p.z;
|
||||
p.y /= p.z;
|
||||
float2 image_p = image[i];
|
||||
if (sqr(p.x - image_p.x) + sqr(p.y - image_p.y) < dist_threshold)
|
||||
++num_inliers;
|
||||
}
|
||||
|
||||
__shared__ int s_num_inliers[BLOCK_SIZE];
|
||||
reduce<BLOCK_SIZE>(s_num_inliers, num_inliers, threadIdx.x, plus<int>());
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
g_num_inliers[blockIdx.x] = num_inliers;
|
||||
}
|
||||
|
||||
void computeHypothesisScores(
|
||||
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
||||
const float3* transl_vectors, const float3* object, const float2* image,
|
||||
const float dist_threshold, int* hypothesis_scores)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot_matrices, rot_matrices, num_hypotheses * 3 * sizeof(float3)));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl_vectors, transl_vectors, num_hypotheses * sizeof(float3)));
|
||||
|
||||
dim3 threads(256);
|
||||
dim3 grid(num_hypotheses);
|
||||
|
||||
computeHypothesisScoresKernel<256><<<grid, threads>>>(
|
||||
num_points, object, image, dist_threshold, hypothesis_scores);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
} // namespace solvepnp_ransac
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,223 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace disp_bilateral_filter
|
||||
{
|
||||
__constant__ float* ctable_color;
|
||||
__constant__ float* ctable_space;
|
||||
__constant__ size_t ctable_space_step;
|
||||
|
||||
__constant__ int cndisp;
|
||||
__constant__ int cradius;
|
||||
|
||||
__constant__ short cedge_disc;
|
||||
__constant__ short cmax_disc;
|
||||
|
||||
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
||||
size_t table_space_step = table_space.step / sizeof(float);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
|
||||
}
|
||||
|
||||
template <int channels>
|
||||
struct DistRgbMax
|
||||
{
|
||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
uchar x = ::abs(a[0] - b[0]);
|
||||
uchar y = ::abs(a[1] - b[1]);
|
||||
uchar z = ::abs(a[2] - b[2]);
|
||||
return (::max(::max(x, y), z));
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct DistRgbMax<1>
|
||||
{
|
||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
return ::abs(a[0] - b[0]);
|
||||
}
|
||||
};
|
||||
|
||||
template <int channels, typename T>
|
||||
__global__ void disp_bilateral_filter(int t, T* disp, size_t disp_step, const uchar* img, size_t img_step, int h, int w)
|
||||
{
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
||||
|
||||
T dp[5];
|
||||
|
||||
if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
|
||||
{
|
||||
dp[0] = *(disp + (y ) * disp_step + x + 0);
|
||||
dp[1] = *(disp + (y-1) * disp_step + x + 0);
|
||||
dp[2] = *(disp + (y ) * disp_step + x - 1);
|
||||
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
||||
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
||||
|
||||
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
|
||||
{
|
||||
const int ymin = ::max(0, y - cradius);
|
||||
const int xmin = ::max(0, x - cradius);
|
||||
const int ymax = ::min(h - 1, y + cradius);
|
||||
const int xmax = ::min(w - 1, x + cradius);
|
||||
|
||||
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||
|
||||
const uchar* ic = img + y * img_step + channels * x;
|
||||
|
||||
for(int yi = ymin; yi <= ymax; yi++)
|
||||
{
|
||||
const T* disp_y = disp + yi * disp_step;
|
||||
|
||||
for(int xi = xmin; xi <= xmax; xi++)
|
||||
{
|
||||
const uchar* in = img + yi * img_step + channels * xi;
|
||||
|
||||
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
|
||||
|
||||
const float weight = ctable_color[dist_rgb] * (ctable_space + ::abs(y-yi)* ctable_space_step)[::abs(x-xi)];
|
||||
|
||||
const T disp_reg = disp_y[xi];
|
||||
|
||||
cost[0] += ::min(cmax_disc, ::abs(disp_reg - dp[0])) * weight;
|
||||
cost[1] += ::min(cmax_disc, ::abs(disp_reg - dp[1])) * weight;
|
||||
cost[2] += ::min(cmax_disc, ::abs(disp_reg - dp[2])) * weight;
|
||||
cost[3] += ::min(cmax_disc, ::abs(disp_reg - dp[3])) * weight;
|
||||
cost[4] += ::min(cmax_disc, ::abs(disp_reg - dp[4])) * weight;
|
||||
}
|
||||
}
|
||||
|
||||
float minimum = numeric_limits<float>::max();
|
||||
int id = 0;
|
||||
|
||||
if (cost[0] < minimum)
|
||||
{
|
||||
minimum = cost[0];
|
||||
id = 0;
|
||||
}
|
||||
if (cost[1] < minimum)
|
||||
{
|
||||
minimum = cost[1];
|
||||
id = 1;
|
||||
}
|
||||
if (cost[2] < minimum)
|
||||
{
|
||||
minimum = cost[2];
|
||||
id = 2;
|
||||
}
|
||||
if (cost[3] < minimum)
|
||||
{
|
||||
minimum = cost[3];
|
||||
id = 3;
|
||||
}
|
||||
if (cost[4] < minimum)
|
||||
{
|
||||
minimum = cost[4];
|
||||
id = 4;
|
||||
}
|
||||
|
||||
*(disp + y * disp_step + x) = dp[id];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
grid.x = divUp(disp.cols, threads.x << 1);
|
||||
grid.y = divUp(disp.rows, threads.y);
|
||||
|
||||
switch (channels)
|
||||
{
|
||||
case 1:
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
disp_bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
for (int i = 0; i < iters; ++i)
|
||||
{
|
||||
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
disp_bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
break;
|
||||
default:
|
||||
CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void disp_bilateral_filter<uchar>(PtrStepSz<uchar> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||
template void disp_bilateral_filter<short>(PtrStepSz<short> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||
} // namespace bilateral_filter
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,540 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace stereobm
|
||||
{
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define ROWSperTHREAD 21 // the number of rows a thread will process
|
||||
|
||||
#define BLOCK_W 128 // the thread block width (464)
|
||||
#define N_DISPARITIES 8
|
||||
|
||||
#define STEREO_MIND 0 // The minimum d range to check
|
||||
#define STEREO_DISP_STEP N_DISPARITIES // the d step, must be <= 1 to avoid aliasing
|
||||
|
||||
__constant__ unsigned int* cminSSDImage;
|
||||
__constant__ size_t cminSSD_step;
|
||||
__constant__ int cwidth;
|
||||
__constant__ int cheight;
|
||||
|
||||
__device__ __forceinline__ int SQ(int a)
|
||||
{
|
||||
return a * a;
|
||||
}
|
||||
|
||||
template<int RADIUS>
|
||||
__device__ unsigned int CalcSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
|
||||
{
|
||||
unsigned int cache = 0;
|
||||
unsigned int cache2 = 0;
|
||||
|
||||
for(int i = 1; i <= RADIUS; i++)
|
||||
cache += col_ssd[i];
|
||||
|
||||
col_ssd_cache[0] = cache;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < BLOCK_W - RADIUS)
|
||||
cache2 = col_ssd_cache[RADIUS];
|
||||
else
|
||||
for(int i = RADIUS + 1; i < (2 * RADIUS + 1); i++)
|
||||
cache2 += col_ssd[i];
|
||||
|
||||
return col_ssd[0] + cache + cache2;
|
||||
}
|
||||
|
||||
template<int RADIUS>
|
||||
__device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned int *col_ssd)
|
||||
{
|
||||
unsigned int ssd[N_DISPARITIES];
|
||||
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||
ssd[0] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 0 * (BLOCK_W + 2 * RADIUS));
|
||||
__syncthreads();
|
||||
ssd[1] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 1 * (BLOCK_W + 2 * RADIUS));
|
||||
__syncthreads();
|
||||
ssd[2] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 2 * (BLOCK_W + 2 * RADIUS));
|
||||
__syncthreads();
|
||||
ssd[3] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 3 * (BLOCK_W + 2 * RADIUS));
|
||||
__syncthreads();
|
||||
ssd[4] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 4 * (BLOCK_W + 2 * RADIUS));
|
||||
__syncthreads();
|
||||
ssd[5] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 5 * (BLOCK_W + 2 * RADIUS));
|
||||
__syncthreads();
|
||||
ssd[6] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 6 * (BLOCK_W + 2 * RADIUS));
|
||||
__syncthreads();
|
||||
ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS));
|
||||
|
||||
int mssd = ::min(::min(::min(ssd[0], ssd[1]), ::min(ssd[4], ssd[5])), ::min(::min(ssd[2], ssd[3]), ::min(ssd[6], ssd[7])));
|
||||
|
||||
int bestIdx = 0;
|
||||
for (int i = 0; i < N_DISPARITIES; i++)
|
||||
{
|
||||
if (mssd == ssd[i])
|
||||
bestIdx = i;
|
||||
}
|
||||
|
||||
return make_uint2(mssd, bestIdx);
|
||||
}
|
||||
|
||||
template<int RADIUS>
|
||||
__device__ void StepDown(int idx1, int idx2, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
|
||||
{
|
||||
unsigned char leftPixel1;
|
||||
unsigned char leftPixel2;
|
||||
unsigned char rightPixel1[8];
|
||||
unsigned char rightPixel2[8];
|
||||
unsigned int diff1, diff2;
|
||||
|
||||
leftPixel1 = imageL[idx1];
|
||||
leftPixel2 = imageL[idx2];
|
||||
|
||||
idx1 = idx1 - d;
|
||||
idx2 = idx2 - d;
|
||||
|
||||
rightPixel1[7] = imageR[idx1 - 7];
|
||||
rightPixel1[0] = imageR[idx1 - 0];
|
||||
rightPixel1[1] = imageR[idx1 - 1];
|
||||
rightPixel1[2] = imageR[idx1 - 2];
|
||||
rightPixel1[3] = imageR[idx1 - 3];
|
||||
rightPixel1[4] = imageR[idx1 - 4];
|
||||
rightPixel1[5] = imageR[idx1 - 5];
|
||||
rightPixel1[6] = imageR[idx1 - 6];
|
||||
|
||||
rightPixel2[7] = imageR[idx2 - 7];
|
||||
rightPixel2[0] = imageR[idx2 - 0];
|
||||
rightPixel2[1] = imageR[idx2 - 1];
|
||||
rightPixel2[2] = imageR[idx2 - 2];
|
||||
rightPixel2[3] = imageR[idx2 - 3];
|
||||
rightPixel2[4] = imageR[idx2 - 4];
|
||||
rightPixel2[5] = imageR[idx2 - 5];
|
||||
rightPixel2[6] = imageR[idx2 - 6];
|
||||
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||
diff1 = leftPixel1 - rightPixel1[0];
|
||||
diff2 = leftPixel2 - rightPixel2[0];
|
||||
col_ssd[0 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[1];
|
||||
diff2 = leftPixel2 - rightPixel2[1];
|
||||
col_ssd[1 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[2];
|
||||
diff2 = leftPixel2 - rightPixel2[2];
|
||||
col_ssd[2 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[3];
|
||||
diff2 = leftPixel2 - rightPixel2[3];
|
||||
col_ssd[3 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[4];
|
||||
diff2 = leftPixel2 - rightPixel2[4];
|
||||
col_ssd[4 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[5];
|
||||
diff2 = leftPixel2 - rightPixel2[5];
|
||||
col_ssd[5 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[6];
|
||||
diff2 = leftPixel2 - rightPixel2[6];
|
||||
col_ssd[6 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
|
||||
diff1 = leftPixel1 - rightPixel1[7];
|
||||
diff2 = leftPixel2 - rightPixel2[7];
|
||||
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] += SQ(diff2) - SQ(diff1);
|
||||
}
|
||||
|
||||
template<int RADIUS>
|
||||
__device__ void InitColSSD(int x_tex, int y_tex, int im_pitch, unsigned char* imageL, unsigned char* imageR, int d, volatile unsigned int *col_ssd)
|
||||
{
|
||||
unsigned char leftPixel1;
|
||||
int idx;
|
||||
unsigned int diffa[] = {0, 0, 0, 0, 0, 0, 0, 0};
|
||||
|
||||
for(int i = 0; i < (2 * RADIUS + 1); i++)
|
||||
{
|
||||
idx = y_tex * im_pitch + x_tex;
|
||||
leftPixel1 = imageL[idx];
|
||||
idx = idx - d;
|
||||
|
||||
diffa[0] += SQ(leftPixel1 - imageR[idx - 0]);
|
||||
diffa[1] += SQ(leftPixel1 - imageR[idx - 1]);
|
||||
diffa[2] += SQ(leftPixel1 - imageR[idx - 2]);
|
||||
diffa[3] += SQ(leftPixel1 - imageR[idx - 3]);
|
||||
diffa[4] += SQ(leftPixel1 - imageR[idx - 4]);
|
||||
diffa[5] += SQ(leftPixel1 - imageR[idx - 5]);
|
||||
diffa[6] += SQ(leftPixel1 - imageR[idx - 6]);
|
||||
diffa[7] += SQ(leftPixel1 - imageR[idx - 7]);
|
||||
|
||||
y_tex += 1;
|
||||
}
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||
col_ssd[0 * (BLOCK_W + 2 * RADIUS)] = diffa[0];
|
||||
col_ssd[1 * (BLOCK_W + 2 * RADIUS)] = diffa[1];
|
||||
col_ssd[2 * (BLOCK_W + 2 * RADIUS)] = diffa[2];
|
||||
col_ssd[3 * (BLOCK_W + 2 * RADIUS)] = diffa[3];
|
||||
col_ssd[4 * (BLOCK_W + 2 * RADIUS)] = diffa[4];
|
||||
col_ssd[5 * (BLOCK_W + 2 * RADIUS)] = diffa[5];
|
||||
col_ssd[6 * (BLOCK_W + 2 * RADIUS)] = diffa[6];
|
||||
col_ssd[7 * (BLOCK_W + 2 * RADIUS)] = diffa[7];
|
||||
}
|
||||
|
||||
template<int RADIUS>
|
||||
__global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t img_step, PtrStepb disp, int maxdisp)
|
||||
{
|
||||
extern __shared__ unsigned int col_ssd_cache[];
|
||||
volatile unsigned int *col_ssd = col_ssd_cache + BLOCK_W + threadIdx.x;
|
||||
volatile unsigned int *col_ssd_extra = threadIdx.x < (2 * RADIUS) ? col_ssd + BLOCK_W : 0; //#define N_DIRTY_PIXELS (2 * RADIUS)
|
||||
|
||||
//#define X (blockIdx.x * BLOCK_W + threadIdx.x + STEREO_MAXD)
|
||||
int X = (blockIdx.x * BLOCK_W + threadIdx.x + maxdisp + RADIUS);
|
||||
//#define Y (__mul24(blockIdx.y, ROWSperTHREAD) + RADIUS)
|
||||
#define Y (blockIdx.y * ROWSperTHREAD + RADIUS)
|
||||
//int Y = blockIdx.y * ROWSperTHREAD + RADIUS;
|
||||
|
||||
unsigned int* minSSDImage = cminSSDImage + X + Y * cminSSD_step;
|
||||
unsigned char* disparImage = disp.data + X + Y * disp.step;
|
||||
/* if (X < cwidth)
|
||||
{
|
||||
unsigned int *minSSDImage_end = minSSDImage + min(ROWSperTHREAD, cheight - Y) * minssd_step;
|
||||
for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
|
||||
*ptr = 0xFFFFFFFF;
|
||||
}*/
|
||||
int end_row = ::min(ROWSperTHREAD, cheight - Y - RADIUS);
|
||||
int y_tex;
|
||||
int x_tex = X - RADIUS;
|
||||
|
||||
if (x_tex >= cwidth)
|
||||
return;
|
||||
|
||||
for(int d = STEREO_MIND; d < maxdisp; d += STEREO_DISP_STEP)
|
||||
{
|
||||
y_tex = Y - RADIUS;
|
||||
|
||||
InitColSSD<RADIUS>(x_tex, y_tex, img_step, left, right, d, col_ssd);
|
||||
|
||||
if (col_ssd_extra > 0)
|
||||
if (x_tex + BLOCK_W < cwidth)
|
||||
InitColSSD<RADIUS>(x_tex + BLOCK_W, y_tex, img_step, left, right, d, col_ssd_extra);
|
||||
|
||||
__syncthreads(); //before MinSSD function
|
||||
|
||||
if (X < cwidth - RADIUS && Y < cheight - RADIUS)
|
||||
{
|
||||
uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd);
|
||||
if (minSSD.x < minSSDImage[0])
|
||||
{
|
||||
disparImage[0] = (unsigned char)(d + minSSD.y);
|
||||
minSSDImage[0] = minSSD.x;
|
||||
}
|
||||
}
|
||||
|
||||
for(int row = 1; row < end_row; row++)
|
||||
{
|
||||
int idx1 = y_tex * img_step + x_tex;
|
||||
int idx2 = (y_tex + (2 * RADIUS + 1)) * img_step + x_tex;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
StepDown<RADIUS>(idx1, idx2, left, right, d, col_ssd);
|
||||
|
||||
if (col_ssd_extra)
|
||||
if (x_tex + BLOCK_W < cwidth)
|
||||
StepDown<RADIUS>(idx1, idx2, left + BLOCK_W, right + BLOCK_W, d, col_ssd_extra);
|
||||
|
||||
y_tex += 1;
|
||||
|
||||
__syncthreads(); //before MinSSD function
|
||||
|
||||
if (X < cwidth - RADIUS && row < cheight - RADIUS - Y)
|
||||
{
|
||||
int idx = row * cminSSD_step;
|
||||
uint2 minSSD = MinSSD<RADIUS>(col_ssd_cache + threadIdx.x, col_ssd);
|
||||
if (minSSD.x < minSSDImage[idx])
|
||||
{
|
||||
disparImage[disp.step * row] = (unsigned char)(d + minSSD.y);
|
||||
minSSDImage[idx] = minSSD.x;
|
||||
}
|
||||
}
|
||||
} // for row loop
|
||||
} // for d loop
|
||||
}
|
||||
|
||||
|
||||
template<int RADIUS> void kernel_caller(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream)
|
||||
{
|
||||
dim3 grid(1,1,1);
|
||||
dim3 threads(BLOCK_W, 1, 1);
|
||||
|
||||
grid.x = divUp(left.cols - maxdisp - 2 * RADIUS, BLOCK_W);
|
||||
grid.y = divUp(left.rows - 2 * RADIUS, ROWSperTHREAD);
|
||||
|
||||
//See above: #define COL_SSD_SIZE (BLOCK_W + 2 * RADIUS)
|
||||
size_t smem_size = (BLOCK_W + N_DISPARITIES * (BLOCK_W + 2 * RADIUS)) * sizeof(unsigned int);
|
||||
|
||||
stereoKernel<RADIUS><<<grid, threads, smem_size, stream>>>(left.data, right.data, left.step, disp, maxdisp);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
};
|
||||
|
||||
typedef void (*kernel_caller_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, cudaStream_t & stream);
|
||||
|
||||
const static kernel_caller_t callers[] =
|
||||
{
|
||||
0,
|
||||
kernel_caller< 1>, kernel_caller< 2>, kernel_caller< 3>, kernel_caller< 4>, kernel_caller< 5>,
|
||||
kernel_caller< 6>, kernel_caller< 7>, kernel_caller< 8>, kernel_caller< 9>, kernel_caller<10>,
|
||||
kernel_caller<11>, kernel_caller<12>, kernel_caller<13>, kernel_caller<15>, kernel_caller<15>,
|
||||
kernel_caller<16>, kernel_caller<17>, kernel_caller<18>, kernel_caller<19>, kernel_caller<20>,
|
||||
kernel_caller<21>, kernel_caller<22>, kernel_caller<23>, kernel_caller<24>, kernel_caller<25>
|
||||
|
||||
//0,0,0, 0,0,0, 0,0,kernel_caller<9>
|
||||
};
|
||||
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
||||
|
||||
void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int maxdisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
||||
{
|
||||
int winsz2 = winsz >> 1;
|
||||
|
||||
if (winsz2 == 0 || winsz2 >= calles_num)
|
||||
CV_Error(cv::Error::StsBadArg, "Unsupported window size");
|
||||
|
||||
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferL1) );
|
||||
//cudaSafeCall( cudaFuncSetCacheConfig(&stereoKernel, cudaFuncCachePreferShared) );
|
||||
|
||||
cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
|
||||
cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
|
||||
|
||||
size_t minssd_step = minSSD_buf.step/minSSD_buf.elemSize();
|
||||
cudaSafeCall( cudaMemcpyToSymbol( cminSSD_step, &minssd_step, sizeof(minssd_step) ) );
|
||||
|
||||
callers[winsz2](left, right, disp, maxdisp, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////////// Sobel Prefiler ///////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
||||
|
||||
__global__ void prefilter_kernel(PtrStepSzb output, int prefilterCap)
|
||||
{
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < output.cols && y < output.rows)
|
||||
{
|
||||
int conv = (int)tex2D(texForSobel, x - 1, y - 1) * (-1) + (int)tex2D(texForSobel, x + 1, y - 1) * (1) +
|
||||
(int)tex2D(texForSobel, x - 1, y ) * (-2) + (int)tex2D(texForSobel, x + 1, y ) * (2) +
|
||||
(int)tex2D(texForSobel, x - 1, y + 1) * (-1) + (int)tex2D(texForSobel, x + 1, y + 1) * (1);
|
||||
|
||||
|
||||
conv = ::min(::min(::max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
|
||||
output.ptr(y)[x] = conv & 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap, cudaStream_t & stream)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(input.cols, threads.x);
|
||||
grid.y = divUp(input.rows, threads.y);
|
||||
|
||||
prefilter_kernel<<<grid, threads, 0, stream>>>(output, prefilterCap);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cudaSafeCall( cudaUnbindTexture (texForSobel ) );
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////// Textureness filtering ////////////////////////////////////////
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
|
||||
|
||||
__device__ __forceinline__ float sobel(int x, int y)
|
||||
{
|
||||
float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) +
|
||||
tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) +
|
||||
tex2D(texForTF, x - 1, y + 1) * (-1) + tex2D(texForTF, x + 1, y + 1) * (1);
|
||||
return fabs(conv);
|
||||
}
|
||||
|
||||
__device__ float CalcSums(float *cols, float *cols_cache, int winsz)
|
||||
{
|
||||
float cache = 0;
|
||||
float cache2 = 0;
|
||||
int winsz2 = winsz/2;
|
||||
|
||||
for(int i = 1; i <= winsz2; i++)
|
||||
cache += cols[i];
|
||||
|
||||
cols_cache[0] = cache;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < blockDim.x - winsz2)
|
||||
cache2 = cols_cache[winsz2];
|
||||
else
|
||||
for(int i = winsz2 + 1; i < winsz; i++)
|
||||
cache2 += cols[i];
|
||||
|
||||
return cols[0] + cache + cache2;
|
||||
}
|
||||
|
||||
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
||||
|
||||
__global__ void textureness_kernel(PtrStepSzb disp, int winsz, float threshold)
|
||||
{
|
||||
int winsz2 = winsz/2;
|
||||
int n_dirty_pixels = (winsz2) * 2;
|
||||
|
||||
extern __shared__ float cols_cache[];
|
||||
float *cols = cols_cache + blockDim.x + threadIdx.x;
|
||||
float *cols_extra = threadIdx.x < n_dirty_pixels ? cols + blockDim.x : 0;
|
||||
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int beg_row = blockIdx.y * RpT;
|
||||
int end_row = ::min(beg_row + RpT, disp.rows);
|
||||
|
||||
if (x < disp.cols)
|
||||
{
|
||||
int y = beg_row;
|
||||
|
||||
float sum = 0;
|
||||
float sum_extra = 0;
|
||||
|
||||
for(int i = y - winsz2; i <= y + winsz2; ++i)
|
||||
{
|
||||
sum += sobel(x - winsz2, i);
|
||||
if (cols_extra)
|
||||
sum_extra += sobel(x + blockDim.x - winsz2, i);
|
||||
}
|
||||
*cols = sum;
|
||||
if (cols_extra)
|
||||
*cols_extra = sum_extra;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
|
||||
if (sum_win < threshold)
|
||||
disp.data[y * disp.step + x] = 0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
for(int y = beg_row + 1; y < end_row; ++y)
|
||||
{
|
||||
sum = sum - sobel(x - winsz2, y - winsz2 - 1) + sobel(x - winsz2, y + winsz2);
|
||||
*cols = sum;
|
||||
|
||||
if (cols_extra)
|
||||
{
|
||||
sum_extra = sum_extra - sobel(x + blockDim.x - winsz2, y - winsz2 - 1) + sobel(x + blockDim.x - winsz2, y + winsz2);
|
||||
*cols_extra = sum_extra;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
float sum_win = CalcSums(cols, cols_cache + threadIdx.x, winsz) * 255;
|
||||
if (sum_win < threshold)
|
||||
disp.data[y * disp.step + x] = 0;
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream)
|
||||
{
|
||||
avgTexturenessThreshold *= winsz * winsz;
|
||||
|
||||
texForTF.filterMode = cudaFilterModeLinear;
|
||||
texForTF.addressMode[0] = cudaAddressModeWrap;
|
||||
texForTF.addressMode[1] = cudaAddressModeWrap;
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, texForTF, input.data, desc, input.cols, input.rows, input.step ) );
|
||||
|
||||
dim3 threads(128, 1, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(input.cols, threads.x);
|
||||
grid.y = divUp(input.rows, RpT);
|
||||
|
||||
size_t smem_size = (threads.x + threads.x + (winsz/2) * 2 ) * sizeof(float);
|
||||
textureness_kernel<<<grid, threads, smem_size, stream>>>(disp, winsz, avgTexturenessThreshold);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
||||
}
|
||||
} // namespace stereobm
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,538 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace stereobp
|
||||
{
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////// load constants ////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
__constant__ int cndisp;
|
||||
__constant__ float cmax_data_term;
|
||||
__constant__ float cdata_weight;
|
||||
__constant__ float cmax_disc_term;
|
||||
__constant__ float cdisc_single_jump;
|
||||
|
||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int )) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
////////////////////////// comp data //////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <int cn> struct PixDiff;
|
||||
template <> struct PixDiff<1>
|
||||
{
|
||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||
{
|
||||
l = *ls;
|
||||
}
|
||||
__device__ __forceinline__ float operator()(const uchar* rs) const
|
||||
{
|
||||
return ::abs((int)l - *rs);
|
||||
}
|
||||
uchar l;
|
||||
};
|
||||
template <> struct PixDiff<3>
|
||||
{
|
||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||
{
|
||||
l = *((uchar3*)ls);
|
||||
}
|
||||
__device__ __forceinline__ float operator()(const uchar* rs) const
|
||||
{
|
||||
const float tr = 0.299f;
|
||||
const float tg = 0.587f;
|
||||
const float tb = 0.114f;
|
||||
|
||||
float val = tb * ::abs((int)l.x - rs[0]);
|
||||
val += tg * ::abs((int)l.y - rs[1]);
|
||||
val += tr * ::abs((int)l.z - rs[2]);
|
||||
|
||||
return val;
|
||||
}
|
||||
uchar3 l;
|
||||
};
|
||||
template <> struct PixDiff<4>
|
||||
{
|
||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||
{
|
||||
l = *((uchar4*)ls);
|
||||
}
|
||||
__device__ __forceinline__ float operator()(const uchar* rs) const
|
||||
{
|
||||
const float tr = 0.299f;
|
||||
const float tg = 0.587f;
|
||||
const float tb = 0.114f;
|
||||
|
||||
uchar4 r = *((uchar4*)rs);
|
||||
|
||||
float val = tb * ::abs((int)l.x - r.x);
|
||||
val += tg * ::abs((int)l.y - r.y);
|
||||
val += tr * ::abs((int)l.z - r.z);
|
||||
|
||||
return val;
|
||||
}
|
||||
uchar4 l;
|
||||
};
|
||||
|
||||
template <int cn, typename D>
|
||||
__global__ void comp_data(const PtrStepSzb left, const PtrStepb right, PtrStep<D> data)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y > 0 && y < left.rows - 1 && x > 0 && x < left.cols - 1)
|
||||
{
|
||||
const uchar* ls = left.ptr(y) + x * cn;
|
||||
const PixDiff<cn> pixDiff(ls);
|
||||
const uchar* rs = right.ptr(y) + x * cn;
|
||||
|
||||
D* ds = data.ptr(y) + x;
|
||||
const size_t disp_step = data.step * left.rows / sizeof(D);
|
||||
|
||||
for (int disp = 0; disp < cndisp; disp++)
|
||||
{
|
||||
if (x - disp >= 1)
|
||||
{
|
||||
float val = pixDiff(rs - disp * cn);
|
||||
|
||||
ds[disp * disp_step] = saturate_cast<D>(fmin(cdata_weight * val, cdata_weight * cmax_data_term));
|
||||
}
|
||||
else
|
||||
{
|
||||
ds[disp * disp_step] = saturate_cast<D>(cdata_weight * cmax_data_term);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, typename D>
|
||||
void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
|
||||
|
||||
template <> void comp_data_gpu<uchar, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(left.cols, threads.x);
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<1, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <> void comp_data_gpu<uchar, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(left.cols, threads.x);
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<1, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <> void comp_data_gpu<uchar3, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(left.cols, threads.x);
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<3, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <> void comp_data_gpu<uchar3, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(left.cols, threads.x);
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<3, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <> void comp_data_gpu<uchar4, short>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(left.cols, threads.x);
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<4, short><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<short>)data);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <> void comp_data_gpu<uchar4, float>(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(left.cols, threads.x);
|
||||
grid.y = divUp(left.rows, threads.y);
|
||||
|
||||
comp_data<4, float><<<grid, threads, 0, stream>>>(left, right, (PtrStepSz<float>)data);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////////// data step down ///////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
__global__ void data_step_down(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
for (int d = 0; d < cndisp; ++d)
|
||||
{
|
||||
float dst_reg = src.ptr(d * src_rows + (2*y+0))[(2*x+0)];
|
||||
dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+0)];
|
||||
dst_reg += src.ptr(d * src_rows + (2*y+0))[(2*x+1)];
|
||||
dst_reg += src.ptr(d * src_rows + (2*y+1))[(2*x+1)];
|
||||
|
||||
dst.ptr(d * dst_rows + y)[x] = saturate_cast<T>(dst_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(dst_cols, threads.x);
|
||||
grid.y = divUp(dst_rows, threads.y);
|
||||
|
||||
data_step_down<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)src, (PtrStepSz<T>)dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void data_step_down_gpu<short>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||
template void data_step_down_gpu<float>(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////// level up messages ////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
__global__ void level_up_message(int dst_cols, int dst_rows, int src_rows, const PtrStep<T> src, PtrStep<T> dst)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
const size_t dst_disp_step = dst.step * dst_rows / sizeof(T);
|
||||
const size_t src_disp_step = src.step * src_rows / sizeof(T);
|
||||
|
||||
T* dstr = dst.ptr(y ) + x;
|
||||
const T* srcr = src.ptr(y/2) + x/2;
|
||||
|
||||
for (int d = 0; d < cndisp; ++d)
|
||||
dstr[d * dst_disp_step] = srcr[d * src_disp_step];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(dst_cols, threads.x);
|
||||
grid.y = divUp(dst_rows, threads.y);
|
||||
|
||||
int src_idx = (dst_idx + 1) & 1;
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mus[src_idx], (PtrStepSz<T>)mus[dst_idx]);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mds[src_idx], (PtrStepSz<T>)mds[dst_idx]);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mls[src_idx], (PtrStepSz<T>)mls[dst_idx]);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (PtrStepSz<T>)mrs[src_idx], (PtrStepSz<T>)mrs[dst_idx]);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void level_up_messages_gpu<short>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||
template void level_up_messages_gpu<float>(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////// calc all iterations /////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
__device__ void calc_min_linear_penalty(T* dst, size_t step)
|
||||
{
|
||||
float prev = dst[0];
|
||||
float cur;
|
||||
for (int disp = 1; disp < cndisp; ++disp)
|
||||
{
|
||||
prev += cdisc_single_jump;
|
||||
cur = dst[step * disp];
|
||||
if (prev < cur)
|
||||
{
|
||||
cur = prev;
|
||||
dst[step * disp] = saturate_cast<T>(prev);
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
|
||||
prev = dst[(cndisp - 1) * step];
|
||||
for (int disp = cndisp - 2; disp >= 0; disp--)
|
||||
{
|
||||
prev += cdisc_single_jump;
|
||||
cur = dst[step * disp];
|
||||
if (prev < cur)
|
||||
{
|
||||
cur = prev;
|
||||
dst[step * disp] = saturate_cast<T>(prev);
|
||||
}
|
||||
prev = cur;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
|
||||
{
|
||||
float minimum = cudev::numeric_limits<float>::max();
|
||||
|
||||
for(int i = 0; i < cndisp; ++i)
|
||||
{
|
||||
float dst_reg = msg1[msg_disp_step * i];
|
||||
dst_reg += msg2[msg_disp_step * i];
|
||||
dst_reg += msg3[msg_disp_step * i];
|
||||
dst_reg += data[data_disp_step * i];
|
||||
|
||||
if (dst_reg < minimum)
|
||||
minimum = dst_reg;
|
||||
|
||||
dst[msg_disp_step * i] = saturate_cast<T>(dst_reg);
|
||||
}
|
||||
|
||||
calc_min_linear_penalty(dst, msg_disp_step);
|
||||
|
||||
minimum += cmax_disc_term;
|
||||
|
||||
float sum = 0;
|
||||
for(int i = 0; i < cndisp; ++i)
|
||||
{
|
||||
float dst_reg = dst[msg_disp_step * i];
|
||||
if (dst_reg > minimum)
|
||||
{
|
||||
dst_reg = minimum;
|
||||
dst[msg_disp_step * i] = saturate_cast<T>(minimum);
|
||||
}
|
||||
sum += dst_reg;
|
||||
}
|
||||
sum /= cndisp;
|
||||
|
||||
for(int i = 0; i < cndisp; ++i)
|
||||
dst[msg_disp_step * i] -= sum;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void one_iteration(int t, int elem_step, T* u, T* d, T* l, T* r, const PtrStep<T> data, int cols, int rows)
|
||||
{
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
const int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + t) & 1);
|
||||
|
||||
if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1))
|
||||
{
|
||||
T* us = u + y * elem_step + x;
|
||||
T* ds = d + y * elem_step + x;
|
||||
T* ls = l + y * elem_step + x;
|
||||
T* rs = r + y * elem_step + x;
|
||||
const T* dt = data.ptr(y) + x;
|
||||
|
||||
size_t msg_disp_step = elem_step * rows;
|
||||
size_t data_disp_step = data.step * rows / sizeof(T);
|
||||
|
||||
message(us + elem_step, ls + 1, rs - 1, dt, us, msg_disp_step, data_disp_step);
|
||||
message(ds - elem_step, ls + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step);
|
||||
message(us + elem_step, ds - elem_step, rs - 1, dt, rs, msg_disp_step, data_disp_step);
|
||||
message(us + elem_step, ds - elem_step, ls + 1, dt, ls, msg_disp_step, data_disp_step);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void calc_all_iterations_gpu(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d,
|
||||
const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(cols, threads.x << 1);
|
||||
grid.y = divUp(rows, threads.y);
|
||||
|
||||
int elem_step = (int)(u.step / sizeof(T));
|
||||
|
||||
for(int t = 0; t < iters; ++t)
|
||||
{
|
||||
one_iteration<T><<<grid, threads, 0, stream>>>(t, elem_step, (T*)u.data, (T*)d.data, (T*)l.data, (T*)r.data, (PtrStepSz<T>)data, cols, rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
template void calc_all_iterations_gpu<short>(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||
template void calc_all_iterations_gpu<float>(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////////// output ////////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
__global__ void output(const int elem_step, const T* u, const T* d, const T* l, const T* r, const T* data,
|
||||
PtrStepSz<short> disp)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y > 0 && y < disp.rows - 1 && x > 0 && x < disp.cols - 1)
|
||||
{
|
||||
const T* us = u + (y + 1) * elem_step + x;
|
||||
const T* ds = d + (y - 1) * elem_step + x;
|
||||
const T* ls = l + y * elem_step + (x + 1);
|
||||
const T* rs = r + y * elem_step+ (x - 1);
|
||||
const T* dt = data + y * elem_step + x;
|
||||
|
||||
size_t disp_step = disp.rows * elem_step;
|
||||
|
||||
int best = 0;
|
||||
float best_val = numeric_limits<float>::max();
|
||||
for (int d = 0; d < cndisp; ++d)
|
||||
{
|
||||
float val = us[d * disp_step];
|
||||
val += ds[d * disp_step];
|
||||
val += ls[d * disp_step];
|
||||
val += rs[d * disp_step];
|
||||
val += dt[d * disp_step];
|
||||
|
||||
if (val < best_val)
|
||||
{
|
||||
best_val = val;
|
||||
best = d;
|
||||
}
|
||||
}
|
||||
|
||||
disp.ptr(y)[x] = saturate_cast<short>(best);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void output_gpu(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data,
|
||||
const PtrStepSz<short>& disp, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(disp.cols, threads.x);
|
||||
grid.y = divUp(disp.rows, threads.y);
|
||||
|
||||
int elem_step = static_cast<int>(u.step/sizeof(T));
|
||||
|
||||
output<T><<<grid, threads, 0, stream>>>(elem_step, (const T*)u.data, (const T*)d.data, (const T*)l.data, (const T*)r.data, (const T*)data.data, disp);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void output_gpu<short>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||
template void output_gpu<float>(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||
} // namespace stereobp
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,864 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace stereocsbp
|
||||
{
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////// load constants ////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
__constant__ int cndisp;
|
||||
|
||||
__constant__ float cmax_data_term;
|
||||
__constant__ float cdata_weight;
|
||||
__constant__ float cmax_disc_term;
|
||||
__constant__ float cdisc_single_jump;
|
||||
|
||||
__constant__ int cth;
|
||||
|
||||
__constant__ size_t cimg_step;
|
||||
__constant__ size_t cmsg_step;
|
||||
__constant__ size_t cdisp_step1;
|
||||
__constant__ size_t cdisp_step2;
|
||||
|
||||
__constant__ uchar* cleft;
|
||||
__constant__ uchar* cright;
|
||||
__constant__ uchar* ctemp;
|
||||
|
||||
|
||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_data_term, &max_data_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdata_weight, &data_weight, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc_term, &max_disc_term, sizeof(float)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisc_single_jump, &disc_single_jump, sizeof(float)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cth, &min_disp_th, sizeof(int)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cimg_step, &left.step, sizeof(size_t)) );
|
||||
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cleft, &left.data, sizeof(left.data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cright, &right.data, sizeof(right.data)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(ctemp, &temp.data, sizeof(temp.data)) );
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////// init data cost ////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <int channels> struct DataCostPerPixel;
|
||||
template <> struct DataCostPerPixel<1>
|
||||
{
|
||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
return fmin(cdata_weight * ::abs((int)*left - *right), cdata_weight * cmax_data_term);
|
||||
}
|
||||
};
|
||||
template <> struct DataCostPerPixel<3>
|
||||
{
|
||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
float tb = 0.114f * ::abs((int)left[0] - right[0]);
|
||||
float tg = 0.587f * ::abs((int)left[1] - right[1]);
|
||||
float tr = 0.299f * ::abs((int)left[2] - right[2]);
|
||||
|
||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||
}
|
||||
};
|
||||
template <> struct DataCostPerPixel<4>
|
||||
{
|
||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
uchar4 l = *((const uchar4*)left);
|
||||
uchar4 r = *((const uchar4*)right);
|
||||
|
||||
float tb = 0.114f * ::abs((int)l.x - r.x);
|
||||
float tg = 0.587f * ::abs((int)l.y - r.y);
|
||||
float tr = 0.299f * ::abs((int)l.z - r.z);
|
||||
|
||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__global__ void get_first_k_initial_global(T* data_cost_selected_, T *selected_disp_pyr, int h, int w, int nr_plane)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < h && x < w)
|
||||
{
|
||||
T* selected_disparity = selected_disp_pyr + y * cmsg_step + x;
|
||||
T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x;
|
||||
T* data_cost = (T*)ctemp + y * cmsg_step + x;
|
||||
|
||||
for(int i = 0; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = cudev::numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
for(int d = 0; d < cndisp; d++)
|
||||
{
|
||||
T cur = data_cost[d * cdisp_step1];
|
||||
if(cur < minimum)
|
||||
{
|
||||
minimum = cur;
|
||||
id = d;
|
||||
}
|
||||
}
|
||||
|
||||
data_cost_selected[i * cdisp_step1] = minimum;
|
||||
selected_disparity[i * cdisp_step1] = id;
|
||||
data_cost [id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void get_first_k_initial_local(T* data_cost_selected_, T* selected_disp_pyr, int h, int w, int nr_plane)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < h && x < w)
|
||||
{
|
||||
T* selected_disparity = selected_disp_pyr + y * cmsg_step + x;
|
||||
T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x;
|
||||
T* data_cost = (T*)ctemp + y * cmsg_step + x;
|
||||
|
||||
int nr_local_minimum = 0;
|
||||
|
||||
T prev = data_cost[0 * cdisp_step1];
|
||||
T cur = data_cost[1 * cdisp_step1];
|
||||
T next = data_cost[2 * cdisp_step1];
|
||||
|
||||
for (int d = 1; d < cndisp - 1 && nr_local_minimum < nr_plane; d++)
|
||||
{
|
||||
if (cur < prev && cur < next)
|
||||
{
|
||||
data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
|
||||
selected_disparity[nr_local_minimum * cdisp_step1] = d;
|
||||
|
||||
data_cost[d * cdisp_step1] = numeric_limits<T>::max();
|
||||
|
||||
nr_local_minimum++;
|
||||
}
|
||||
prev = cur;
|
||||
cur = next;
|
||||
next = data_cost[(d + 1) * cdisp_step1];
|
||||
}
|
||||
|
||||
for (int i = nr_local_minimum; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
|
||||
for (int d = 0; d < cndisp; d++)
|
||||
{
|
||||
cur = data_cost[d * cdisp_step1];
|
||||
if (cur < minimum)
|
||||
{
|
||||
minimum = cur;
|
||||
id = d;
|
||||
}
|
||||
}
|
||||
data_cost_selected[i * cdisp_step1] = minimum;
|
||||
selected_disparity[i * cdisp_step1] = id;
|
||||
|
||||
data_cost[id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int channels>
|
||||
__global__ void init_data_cost(int h, int w, int level)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < h && x < w)
|
||||
{
|
||||
int y0 = y << level;
|
||||
int yt = (y + 1) << level;
|
||||
|
||||
int x0 = x << level;
|
||||
int xt = (x + 1) << level;
|
||||
|
||||
T* data_cost = (T*)ctemp + y * cmsg_step + x;
|
||||
|
||||
for(int d = 0; d < cndisp; ++d)
|
||||
{
|
||||
float val = 0.0f;
|
||||
for(int yi = y0; yi < yt; yi++)
|
||||
{
|
||||
for(int xi = x0; xi < xt; xi++)
|
||||
{
|
||||
int xr = xi - d;
|
||||
if(d < cth || xr < 0)
|
||||
val += cdata_weight * cmax_data_term;
|
||||
else
|
||||
{
|
||||
const uchar* lle = cleft + yi * cimg_step + xi * channels;
|
||||
const uchar* lri = cright + yi * cimg_step + xr * channels;
|
||||
|
||||
val += DataCostPerPixel<channels>::compute(lle, lri);
|
||||
}
|
||||
}
|
||||
}
|
||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int winsz, int channels>
|
||||
__global__ void init_data_cost_reduce(int level, int rows, int cols, int h)
|
||||
{
|
||||
int x_out = blockIdx.x;
|
||||
int y_out = blockIdx.y % h;
|
||||
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
|
||||
|
||||
int tid = threadIdx.x;
|
||||
|
||||
if (d < cndisp)
|
||||
{
|
||||
int x0 = x_out << level;
|
||||
int y0 = y_out << level;
|
||||
|
||||
int len = ::min(y0 + winsz, rows) - y0;
|
||||
|
||||
float val = 0.0f;
|
||||
if (x0 + tid < cols)
|
||||
{
|
||||
if (x0 + tid - d < 0 || d < cth)
|
||||
val = cdata_weight * cmax_data_term * len;
|
||||
else
|
||||
{
|
||||
const uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
|
||||
const uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - d);
|
||||
|
||||
for(int y = 0; y < len; ++y)
|
||||
{
|
||||
val += DataCostPerPixel<channels>::compute(lle, lri);
|
||||
|
||||
lle += cimg_step;
|
||||
lri += cimg_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern __shared__ float smem[];
|
||||
|
||||
reduce<winsz>(smem + winsz * threadIdx.z, val, tid, plus<float>());
|
||||
|
||||
T* data_cost = (T*)ctemp + y_out * cmsg_step + x_out;
|
||||
|
||||
if (tid == 0)
|
||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void init_data_cost_caller_(int /*rows*/, int /*cols*/, int h, int w, int level, int /*ndisp*/, int channels, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(w, threads.x);
|
||||
grid.y = divUp(h, threads.y);
|
||||
|
||||
switch (channels)
|
||||
{
|
||||
case 1: init_data_cost<T, 1><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||
case 3: init_data_cost<T, 3><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||
case 4: init_data_cost<T, 4><<<grid, threads, 0, stream>>>(h, w, level); break;
|
||||
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int winsz>
|
||||
void init_data_cost_reduce_caller_(int rows, int cols, int h, int w, int level, int ndisp, int channels, cudaStream_t stream)
|
||||
{
|
||||
const int threadsNum = 256;
|
||||
const size_t smem_size = threadsNum * sizeof(float);
|
||||
|
||||
dim3 threads(winsz, 1, threadsNum / winsz);
|
||||
dim3 grid(w, h, 1);
|
||||
grid.y *= divUp(ndisp, threads.z);
|
||||
|
||||
switch (channels)
|
||||
{
|
||||
case 1: init_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||
case 3: init_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||
case 4: init_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(level, rows, cols, h); break;
|
||||
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
|
||||
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream)
|
||||
{
|
||||
|
||||
typedef void (*InitDataCostCaller)(int cols, int rows, int w, int h, int level, int ndisp, int channels, cudaStream_t stream);
|
||||
|
||||
static const InitDataCostCaller init_data_cost_callers[] =
|
||||
{
|
||||
init_data_cost_caller_<T>, init_data_cost_caller_<T>, init_data_cost_reduce_caller_<T, 4>,
|
||||
init_data_cost_reduce_caller_<T, 8>, init_data_cost_reduce_caller_<T, 16>, init_data_cost_reduce_caller_<T, 32>,
|
||||
init_data_cost_reduce_caller_<T, 64>, init_data_cost_reduce_caller_<T, 128>, init_data_cost_reduce_caller_<T, 256>
|
||||
};
|
||||
|
||||
size_t disp_step = msg_step * h;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
init_data_cost_callers[level](rows, cols, h, w, level, ndisp, channels, stream);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(w, threads.x);
|
||||
grid.y = divUp(h, threads.y);
|
||||
|
||||
if (use_local_init_data_cost == true)
|
||||
get_first_k_initial_local<<<grid, threads, 0, stream>>> (data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
||||
else
|
||||
get_first_k_initial_global<<<grid, threads, 0, stream>>>(data_cost_selected, disp_selected_pyr, h, w, nr_plane);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void init_data_cost(int rows, int cols, short* disp_selected_pyr, short* data_cost_selected, size_t msg_step,
|
||||
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
|
||||
|
||||
template void init_data_cost(int rows, int cols, float* disp_selected_pyr, float* data_cost_selected, size_t msg_step,
|
||||
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
////////////////////// compute data cost //////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T, int channels>
|
||||
__global__ void compute_data_cost(const T* selected_disp_pyr, T* data_cost_, int h, int w, int level, int nr_plane)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < h && x < w)
|
||||
{
|
||||
int y0 = y << level;
|
||||
int yt = (y + 1) << level;
|
||||
|
||||
int x0 = x << level;
|
||||
int xt = (x + 1) << level;
|
||||
|
||||
const T* selected_disparity = selected_disp_pyr + y/2 * cmsg_step + x/2;
|
||||
T* data_cost = data_cost_ + y * cmsg_step + x;
|
||||
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
{
|
||||
float val = 0.0f;
|
||||
for(int yi = y0; yi < yt; yi++)
|
||||
{
|
||||
for(int xi = x0; xi < xt; xi++)
|
||||
{
|
||||
int sel_disp = selected_disparity[d * cdisp_step2];
|
||||
int xr = xi - sel_disp;
|
||||
|
||||
if (xr < 0 || sel_disp < cth)
|
||||
val += cdata_weight * cmax_data_term;
|
||||
else
|
||||
{
|
||||
const uchar* left_x = cleft + yi * cimg_step + xi * channels;
|
||||
const uchar* right_x = cright + yi * cimg_step + xr * channels;
|
||||
|
||||
val += DataCostPerPixel<channels>::compute(left_x, right_x);
|
||||
}
|
||||
}
|
||||
}
|
||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int winsz, int channels>
|
||||
__global__ void compute_data_cost_reduce(const T* selected_disp_pyr, T* data_cost_, int level, int rows, int cols, int h, int nr_plane)
|
||||
{
|
||||
int x_out = blockIdx.x;
|
||||
int y_out = blockIdx.y % h;
|
||||
int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
|
||||
|
||||
int tid = threadIdx.x;
|
||||
|
||||
const T* selected_disparity = selected_disp_pyr + y_out/2 * cmsg_step + x_out/2;
|
||||
T* data_cost = data_cost_ + y_out * cmsg_step + x_out;
|
||||
|
||||
if (d < nr_plane)
|
||||
{
|
||||
int sel_disp = selected_disparity[d * cdisp_step2];
|
||||
|
||||
int x0 = x_out << level;
|
||||
int y0 = y_out << level;
|
||||
|
||||
int len = ::min(y0 + winsz, rows) - y0;
|
||||
|
||||
float val = 0.0f;
|
||||
if (x0 + tid < cols)
|
||||
{
|
||||
if (x0 + tid - sel_disp < 0 || sel_disp < cth)
|
||||
val = cdata_weight * cmax_data_term * len;
|
||||
else
|
||||
{
|
||||
const uchar* lle = cleft + y0 * cimg_step + channels * (x0 + tid );
|
||||
const uchar* lri = cright + y0 * cimg_step + channels * (x0 + tid - sel_disp);
|
||||
|
||||
for(int y = 0; y < len; ++y)
|
||||
{
|
||||
val += DataCostPerPixel<channels>::compute(lle, lri);
|
||||
|
||||
lle += cimg_step;
|
||||
lri += cimg_step;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern __shared__ float smem[];
|
||||
|
||||
reduce<winsz>(smem + winsz * threadIdx.z, val, tid, plus<float>());
|
||||
|
||||
if (tid == 0)
|
||||
data_cost[cdisp_step1 * d] = saturate_cast<T>(val);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void compute_data_cost_caller_(const T* disp_selected_pyr, T* data_cost, int /*rows*/, int /*cols*/,
|
||||
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(w, threads.x);
|
||||
grid.y = divUp(h, threads.y);
|
||||
|
||||
switch(channels)
|
||||
{
|
||||
case 1: compute_data_cost<T, 1><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||
case 3: compute_data_cost<T, 3><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||
case 4: compute_data_cost<T, 4><<<grid, threads, 0, stream>>>(disp_selected_pyr, data_cost, h, w, level, nr_plane); break;
|
||||
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int winsz>
|
||||
void compute_data_cost_reduce_caller_(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
|
||||
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||
{
|
||||
const int threadsNum = 256;
|
||||
const size_t smem_size = threadsNum * sizeof(float);
|
||||
|
||||
dim3 threads(winsz, 1, threadsNum / winsz);
|
||||
dim3 grid(w, h, 1);
|
||||
grid.y *= divUp(nr_plane, threads.z);
|
||||
|
||||
switch (channels)
|
||||
{
|
||||
case 1: compute_data_cost_reduce<T, winsz, 1><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||
case 3: compute_data_cost_reduce<T, winsz, 3><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||
case 4: compute_data_cost_reduce<T, winsz, 4><<<grid, threads, smem_size, stream>>>(disp_selected_pyr, data_cost, level, rows, cols, h, nr_plane); break;
|
||||
default: CV_Error(cv::Error::BadNumChannels, "Unsupported channels count");
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step,
|
||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*ComputeDataCostCaller)(const T* disp_selected_pyr, T* data_cost, int rows, int cols,
|
||||
int h, int w, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||
|
||||
static const ComputeDataCostCaller callers[] =
|
||||
{
|
||||
compute_data_cost_caller_<T>, compute_data_cost_caller_<T>, compute_data_cost_reduce_caller_<T, 4>,
|
||||
compute_data_cost_reduce_caller_<T, 8>, compute_data_cost_reduce_caller_<T, 16>, compute_data_cost_reduce_caller_<T, 32>,
|
||||
compute_data_cost_reduce_caller_<T, 64>, compute_data_cost_reduce_caller_<T, 128>, compute_data_cost_reduce_caller_<T, 256>
|
||||
};
|
||||
|
||||
size_t disp_step1 = msg_step * h;
|
||||
size_t disp_step2 = msg_step * h2;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
callers[level](disp_selected_pyr, data_cost, rows, cols, h, w, level, nr_plane, channels, stream);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void compute_data_cost(const short* disp_selected_pyr, short* data_cost, size_t msg_step,
|
||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||
|
||||
template void compute_data_cost(const float* disp_selected_pyr, float* data_cost, size_t msg_step,
|
||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////////// init message /////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
template <typename T>
|
||||
__device__ void get_first_k_element_increase(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||
T* data_cost_selected, T* disparity_selected_new, T* data_cost_new,
|
||||
const T* data_cost_cur, const T* disparity_selected_cur,
|
||||
int nr_plane, int nr_plane2)
|
||||
{
|
||||
for(int i = 0; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
for(int j = 0; j < nr_plane2; j++)
|
||||
{
|
||||
T cur = data_cost_new[j * cdisp_step1];
|
||||
if(cur < minimum)
|
||||
{
|
||||
minimum = cur;
|
||||
id = j;
|
||||
}
|
||||
}
|
||||
|
||||
data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
|
||||
disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
|
||||
|
||||
u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
|
||||
d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
|
||||
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
|
||||
data_cost_new[id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void init_message(T* u_new_, T* d_new_, T* l_new_, T* r_new_,
|
||||
const T* u_cur_, const T* d_cur_, const T* l_cur_, const T* r_cur_,
|
||||
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
|
||||
T* data_cost_selected_, const T* data_cost_,
|
||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y < h && x < w)
|
||||
{
|
||||
const T* u_cur = u_cur_ + ::min(h2-1, y/2 + 1) * cmsg_step + x/2;
|
||||
const T* d_cur = d_cur_ + ::max(0, y/2 - 1) * cmsg_step + x/2;
|
||||
const T* l_cur = l_cur_ + (y/2) * cmsg_step + ::min(w2-1, x/2 + 1);
|
||||
const T* r_cur = r_cur_ + (y/2) * cmsg_step + ::max(0, x/2 - 1);
|
||||
|
||||
T* data_cost_new = (T*)ctemp + y * cmsg_step + x;
|
||||
|
||||
const T* disparity_selected_cur = selected_disp_pyr_cur + y/2 * cmsg_step + x/2;
|
||||
const T* data_cost = data_cost_ + y * cmsg_step + x;
|
||||
|
||||
for(int d = 0; d < nr_plane2; d++)
|
||||
{
|
||||
int idx2 = d * cdisp_step2;
|
||||
|
||||
T val = data_cost[d * cdisp_step1] + u_cur[idx2] + d_cur[idx2] + l_cur[idx2] + r_cur[idx2];
|
||||
data_cost_new[d * cdisp_step1] = val;
|
||||
}
|
||||
|
||||
T* data_cost_selected = data_cost_selected_ + y * cmsg_step + x;
|
||||
T* disparity_selected_new = selected_disp_pyr_new + y * cmsg_step + x;
|
||||
|
||||
T* u_new = u_new_ + y * cmsg_step + x;
|
||||
T* d_new = d_new_ + y * cmsg_step + x;
|
||||
T* l_new = l_new_ + y * cmsg_step + x;
|
||||
T* r_new = r_new_ + y * cmsg_step + x;
|
||||
|
||||
u_cur = u_cur_ + y/2 * cmsg_step + x/2;
|
||||
d_cur = d_cur_ + y/2 * cmsg_step + x/2;
|
||||
l_cur = l_cur_ + y/2 * cmsg_step + x/2;
|
||||
r_cur = r_cur_ + y/2 * cmsg_step + x/2;
|
||||
|
||||
get_first_k_element_increase(u_new, d_new, l_new, r_new, u_cur, d_cur, l_cur, r_cur,
|
||||
data_cost_selected, disparity_selected_new, data_cost_new,
|
||||
data_cost, disparity_selected_cur, nr_plane, nr_plane2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
void init_message(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
|
||||
T* data_cost_selected, const T* data_cost, size_t msg_step,
|
||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream)
|
||||
{
|
||||
|
||||
size_t disp_step1 = msg_step * h;
|
||||
size_t disp_step2 = msg_step * h2;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step1, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step2, &disp_step2, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(w, threads.x);
|
||||
grid.y = divUp(h, threads.y);
|
||||
|
||||
init_message<<<grid, threads, 0, stream>>>(u_new, d_new, l_new, r_new,
|
||||
u_cur, d_cur, l_cur, r_cur,
|
||||
selected_disp_pyr_new, selected_disp_pyr_cur,
|
||||
data_cost_selected, data_cost,
|
||||
h, w, nr_plane, h2, w2, nr_plane2);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
template void init_message(short* u_new, short* d_new, short* l_new, short* r_new,
|
||||
const short* u_cur, const short* d_cur, const short* l_cur, const short* r_cur,
|
||||
short* selected_disp_pyr_new, const short* selected_disp_pyr_cur,
|
||||
short* data_cost_selected, const short* data_cost, size_t msg_step,
|
||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
||||
|
||||
template void init_message(float* u_new, float* d_new, float* l_new, float* r_new,
|
||||
const float* u_cur, const float* d_cur, const float* l_cur, const float* r_cur,
|
||||
float* selected_disp_pyr_new, const float* selected_disp_pyr_cur,
|
||||
float* data_cost_selected, const float* data_cost, size_t msg_step,
|
||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
//////////////////// calc all iterations /////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T>
|
||||
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
|
||||
const T* dst_disp, const T* src_disp, int nr_plane, volatile T* temp)
|
||||
{
|
||||
T minimum = numeric_limits<T>::max();
|
||||
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
{
|
||||
int idx = d * cdisp_step1;
|
||||
T val = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
|
||||
|
||||
if(val < minimum)
|
||||
minimum = val;
|
||||
|
||||
msg_dst[idx] = val;
|
||||
}
|
||||
|
||||
float sum = 0;
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
{
|
||||
float cost_min = minimum + cmax_disc_term;
|
||||
T src_disp_reg = src_disp[d * cdisp_step1];
|
||||
|
||||
for(int d2 = 0; d2 < nr_plane; d2++)
|
||||
cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + cdisc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg));
|
||||
|
||||
temp[d * cdisp_step1] = saturate_cast<T>(cost_min);
|
||||
sum += cost_min;
|
||||
}
|
||||
sum /= nr_plane;
|
||||
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
msg_dst[d * cdisp_step1] = saturate_cast<T>(temp[d * cdisp_step1] - sum);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void compute_message(T* u_, T* d_, T* l_, T* r_, const T* data_cost_selected, const T* selected_disp_pyr_cur, int h, int w, int nr_plane, int i)
|
||||
{
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
int x = ((blockIdx.x * blockDim.x + threadIdx.x) << 1) + ((y + i) & 1);
|
||||
|
||||
if (y > 0 && y < h - 1 && x > 0 && x < w - 1)
|
||||
{
|
||||
const T* data = data_cost_selected + y * cmsg_step + x;
|
||||
|
||||
T* u = u_ + y * cmsg_step + x;
|
||||
T* d = d_ + y * cmsg_step + x;
|
||||
T* l = l_ + y * cmsg_step + x;
|
||||
T* r = r_ + y * cmsg_step + x;
|
||||
|
||||
const T* disp = selected_disp_pyr_cur + y * cmsg_step + x;
|
||||
|
||||
T* temp = (T*)ctemp + y * cmsg_step + x;
|
||||
|
||||
message_per_pixel(data, u, r - 1, u + cmsg_step, l + 1, disp, disp - cmsg_step, nr_plane, temp);
|
||||
message_per_pixel(data, d, d - cmsg_step, r - 1, l + 1, disp, disp + cmsg_step, nr_plane, temp);
|
||||
message_per_pixel(data, l, u + cmsg_step, d - cmsg_step, l + 1, disp, disp - 1, nr_plane, temp);
|
||||
message_per_pixel(data, r, u + cmsg_step, d - cmsg_step, r - 1, disp, disp + 1, nr_plane, temp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class T>
|
||||
void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected,
|
||||
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream)
|
||||
{
|
||||
size_t disp_step = msg_step * h;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(w, threads.x << 1);
|
||||
grid.y = divUp(h, threads.y);
|
||||
|
||||
for(int t = 0; t < iters; ++t)
|
||||
{
|
||||
compute_message<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, selected_disp_pyr_cur, h, w, nr_plane, t & 1);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
};
|
||||
|
||||
template void calc_all_iterations(short* u, short* d, short* l, short* r, const short* data_cost_selected, const short* selected_disp_pyr_cur, size_t msg_step,
|
||||
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
||||
|
||||
template void calc_all_iterations(float* u, float* d, float* l, float* r, const float* data_cost_selected, const float* selected_disp_pyr_cur, size_t msg_step,
|
||||
int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
/////////////////////////// output ////////////////////////////
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
template <typename T>
|
||||
__global__ void compute_disp(const T* u_, const T* d_, const T* l_, const T* r_,
|
||||
const T* data_cost_selected, const T* disp_selected_pyr,
|
||||
PtrStepSz<short> disp, int nr_plane)
|
||||
{
|
||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (y > 0 && y < disp.rows - 1 && x > 0 && x < disp.cols - 1)
|
||||
{
|
||||
const T* data = data_cost_selected + y * cmsg_step + x;
|
||||
const T* disp_selected = disp_selected_pyr + y * cmsg_step + x;
|
||||
|
||||
const T* u = u_ + (y+1) * cmsg_step + (x+0);
|
||||
const T* d = d_ + (y-1) * cmsg_step + (x+0);
|
||||
const T* l = l_ + (y+0) * cmsg_step + (x+1);
|
||||
const T* r = r_ + (y+0) * cmsg_step + (x-1);
|
||||
|
||||
int best = 0;
|
||||
T best_val = numeric_limits<T>::max();
|
||||
for (int i = 0; i < nr_plane; ++i)
|
||||
{
|
||||
int idx = i * cdisp_step1;
|
||||
T val = data[idx]+ u[idx] + d[idx] + l[idx] + r[idx];
|
||||
|
||||
if (val < best_val)
|
||||
{
|
||||
best_val = val;
|
||||
best = saturate_cast<short>(disp_selected[idx]);
|
||||
}
|
||||
}
|
||||
disp(y, x) = best;
|
||||
}
|
||||
}
|
||||
|
||||
template<class T>
|
||||
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream)
|
||||
{
|
||||
size_t disp_step = disp.rows * msg_step;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cdisp_step1, &disp_step, sizeof(size_t)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cmsg_step, &msg_step, sizeof(size_t)) );
|
||||
|
||||
dim3 threads(32, 8, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(disp.cols, threads.x);
|
||||
grid.y = divUp(disp.rows, threads.y);
|
||||
|
||||
compute_disp<<<grid, threads, 0, stream>>>(u, d, l, r, data_cost_selected, disp_selected, disp, nr_plane);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void compute_disp(const short* u, const short* d, const short* l, const short* r, const short* data_cost_selected, const short* disp_selected, size_t msg_step,
|
||||
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
|
||||
|
||||
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
||||
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
|
||||
} // namespace stereocsbp
|
||||
}}} // namespace cv { namespace gpu { namespace cudev {
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@@ -1,157 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_no_cuda(); }
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace disp_bilateral_filter
|
||||
{
|
||||
void disp_load_constants(float* table_color, PtrStepSzf table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
||||
|
||||
template<typename T>
|
||||
void disp_bilateral_filter(PtrStepSz<T> disp, PtrStepSzb img, int channels, int iters, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::cudev::disp_bilateral_filter;
|
||||
|
||||
namespace
|
||||
{
|
||||
const float DEFAULT_EDGE_THRESHOLD = 0.1f;
|
||||
const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f;
|
||||
const float DEFAULT_SIGMA_RANGE = 10.0f;
|
||||
|
||||
inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len)
|
||||
{
|
||||
Mat cpu_table_color(1, len, CV_32F);
|
||||
|
||||
float* line = cpu_table_color.ptr<float>();
|
||||
|
||||
for(int i = 0; i < len; i++)
|
||||
line[i] = static_cast<float>(std::exp(-double(i * i) / (2 * sigma_range * sigma_range)));
|
||||
|
||||
table_color.upload(cpu_table_color);
|
||||
}
|
||||
|
||||
inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space)
|
||||
{
|
||||
int half = (win_size >> 1);
|
||||
|
||||
Mat cpu_table_space(half + 1, half + 1, CV_32F);
|
||||
|
||||
for (int y = 0; y <= half; ++y)
|
||||
{
|
||||
float* row = cpu_table_space.ptr<float>(y);
|
||||
for (int x = 0; x <= half; ++x)
|
||||
row[x] = exp(-sqrt(float(y * y) + float(x * x)) / dist_space);
|
||||
}
|
||||
|
||||
table_space.upload(cpu_table_space);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold,
|
||||
GpuMat& table_color, GpuMat& table_space,
|
||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
short edge_disc = std::max<short>(short(1), short(ndisp * edge_threshold + 0.5));
|
||||
short max_disc = short(ndisp * max_disc_threshold + 0.5);
|
||||
|
||||
disp_load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
|
||||
|
||||
if (&dst != &disp)
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueCopy(disp, dst);
|
||||
else
|
||||
disp.copyTo(dst);
|
||||
}
|
||||
|
||||
disp_bilateral_filter<T>(dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
||||
GpuMat& table_color, GpuMat& table_space,
|
||||
const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream);
|
||||
|
||||
const bilateral_filter_operator_t operators[] =
|
||||
{disp_bilateral_filter_operator<unsigned char>, 0, 0, disp_bilateral_filter_operator<short>, 0, 0, 0, 0};
|
||||
}
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_)
|
||||
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD),
|
||||
sigma_range(DEFAULT_SIGMA_RANGE)
|
||||
{
|
||||
calc_color_weighted_table(table_color, sigma_range, 255);
|
||||
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
||||
}
|
||||
|
||||
cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_,
|
||||
float max_disc_threshold_, float sigma_range_)
|
||||
: ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_),
|
||||
sigma_range(sigma_range_)
|
||||
{
|
||||
calc_color_weighted_table(table_color, sigma_range, 255);
|
||||
calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f);
|
||||
}
|
||||
|
||||
void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters);
|
||||
CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3));
|
||||
operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@@ -1,140 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
cv::gpu::StereoBM_GPU::StereoBM_GPU() { throw_no_cuda(); }
|
||||
cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_no_cuda(); }
|
||||
|
||||
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_no_cuda(); return false; }
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace stereobm
|
||||
{
|
||||
void stereoBM_GPU(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& disp, int ndisp, int winsz, const PtrStepSz<unsigned int>& minSSD_buf, cudaStream_t & stream);
|
||||
void prefilter_xsobel(const PtrStepSzb& input, const PtrStepSzb& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
||||
void postfilter_textureness(const PtrStepSzb& input, int winsz, float avgTexturenessThreshold, const PtrStepSzb& disp, cudaStream_t & stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
const float defaultAvgTexThreshold = 3;
|
||||
}
|
||||
|
||||
cv::gpu::StereoBM_GPU::StereoBM_GPU()
|
||||
: preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold)
|
||||
{
|
||||
}
|
||||
|
||||
cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_, int winSize_)
|
||||
: preset(preset_), ndisp(ndisparities_), winSize(winSize_), avergeTexThreshold(defaultAvgTexThreshold)
|
||||
{
|
||||
const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8);
|
||||
CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp);
|
||||
CV_Assert(ndisp % 8 == 0);
|
||||
CV_Assert(winSize % 2 == 1);
|
||||
}
|
||||
|
||||
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
|
||||
{
|
||||
if (0 == getCudaEnabledDeviceCount())
|
||||
return false;
|
||||
|
||||
DeviceInfo device_info;
|
||||
|
||||
if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void stereo_bm_gpu_operator( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
|
||||
{
|
||||
using namespace ::cv::gpu::cudev::stereobm;
|
||||
|
||||
CV_Assert(left.rows == right.rows && left.cols == right.cols);
|
||||
CV_Assert(left.type() == CV_8UC1);
|
||||
CV_Assert(right.type() == CV_8UC1);
|
||||
|
||||
disparity.create(left.size(), CV_8U);
|
||||
minSSD.create(left.size(), CV_32S);
|
||||
|
||||
GpuMat le_for_bm = left;
|
||||
GpuMat ri_for_bm = right;
|
||||
|
||||
if (preset == StereoBM_GPU::PREFILTER_XSOBEL)
|
||||
{
|
||||
leBuf.create( left.size(), left.type());
|
||||
riBuf.create(right.size(), right.type());
|
||||
|
||||
prefilter_xsobel( left, leBuf, 31, stream);
|
||||
prefilter_xsobel(right, riBuf, 31, stream);
|
||||
|
||||
le_for_bm = leBuf;
|
||||
ri_for_bm = riBuf;
|
||||
}
|
||||
|
||||
stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);
|
||||
|
||||
if (avergeTexThreshold)
|
||||
postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream)
|
||||
{
|
||||
stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@@ -1,368 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int, int, int&, int&, int&) { throw_no_cuda(); }
|
||||
|
||||
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, int) { throw_no_cuda(); }
|
||||
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, float, float, float, float, int) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace stereobp
|
||||
{
|
||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
|
||||
template<typename T, typename D>
|
||||
void comp_data_gpu(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
|
||||
template<typename T>
|
||||
void data_step_down_gpu(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||
template <typename T>
|
||||
void level_up_messages_gpu(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||
template <typename T>
|
||||
void calc_all_iterations_gpu(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d,
|
||||
const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||
template <typename T>
|
||||
void output_gpu(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data,
|
||||
const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::cudev::stereobp;
|
||||
|
||||
namespace
|
||||
{
|
||||
const float DEFAULT_MAX_DATA_TERM = 10.0f;
|
||||
const float DEFAULT_DATA_WEIGHT = 0.07f;
|
||||
const float DEFAULT_MAX_DISC_TERM = 1.7f;
|
||||
const float DEFAULT_DISC_SINGLE_JUMP = 1.0f;
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
|
||||
{
|
||||
ndisp = width / 4;
|
||||
if ((ndisp & 1) != 0)
|
||||
ndisp++;
|
||||
|
||||
int mm = std::max(width, height);
|
||||
iters = mm / 100 + 2;
|
||||
|
||||
levels = (int)(::log(static_cast<double>(mm)) + 1) * 4 / 5;
|
||||
if (levels == 0) levels++;
|
||||
}
|
||||
|
||||
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, int msg_type_)
|
||||
: ndisp(ndisp_), iters(iters_), levels(levels_),
|
||||
max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
|
||||
max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP),
|
||||
msg_type(msg_type_), datas(levels_)
|
||||
{
|
||||
}
|
||||
|
||||
cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_, int msg_type_)
|
||||
: ndisp(ndisp_), iters(iters_), levels(levels_),
|
||||
max_data_term(max_data_term_), data_weight(data_weight_),
|
||||
max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_),
|
||||
msg_type(msg_type_), datas(levels_)
|
||||
{
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class StereoBeliefPropagationImpl
|
||||
{
|
||||
public:
|
||||
StereoBeliefPropagationImpl(StereoBeliefPropagation& rthis_,
|
||||
GpuMat& u_, GpuMat& d_, GpuMat& l_, GpuMat& r_,
|
||||
GpuMat& u2_, GpuMat& d2_, GpuMat& l2_, GpuMat& r2_,
|
||||
std::vector<GpuMat>& datas_, GpuMat& out_)
|
||||
: rthis(rthis_), u(u_), d(d_), l(l_), r(r_), u2(u2_), d2(d2_), l2(l2_), r2(r2_), datas(datas_), out(out_),
|
||||
zero(Scalar::all(0)), scale(rthis_.msg_type == CV_32F ? 1.0f : 10.0f)
|
||||
{
|
||||
CV_Assert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels);
|
||||
CV_Assert(rthis.msg_type == CV_32F || rthis.msg_type == CV_16S);
|
||||
CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < std::numeric_limits<short>::max());
|
||||
}
|
||||
|
||||
void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||
{
|
||||
typedef void (*comp_data_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream);
|
||||
static const comp_data_t comp_data_callers[2][5] =
|
||||
{
|
||||
{0, comp_data_gpu<unsigned char, short>, 0, comp_data_gpu<uchar3, short>, comp_data_gpu<uchar4, short>},
|
||||
{0, comp_data_gpu<unsigned char, float>, 0, comp_data_gpu<uchar3, float>, comp_data_gpu<uchar4, float>}
|
||||
};
|
||||
|
||||
CV_Assert(left.size() == right.size() && left.type() == right.type());
|
||||
CV_Assert(left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4);
|
||||
|
||||
rows = left.rows;
|
||||
cols = left.cols;
|
||||
|
||||
int divisor = (int)pow(2.f, rthis.levels - 1.0f);
|
||||
int lowest_cols = cols / divisor;
|
||||
int lowest_rows = rows / divisor;
|
||||
const int min_image_dim_size = 2;
|
||||
CV_Assert(std::min(lowest_cols, lowest_rows) > min_image_dim_size);
|
||||
|
||||
init(stream);
|
||||
|
||||
datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||
|
||||
comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream));
|
||||
|
||||
calcBP(disp, stream);
|
||||
}
|
||||
|
||||
void operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
|
||||
{
|
||||
CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0));
|
||||
|
||||
rows = data.rows / rthis.ndisp;
|
||||
cols = data.cols;
|
||||
|
||||
int divisor = (int)pow(2.f, rthis.levels - 1.0f);
|
||||
int lowest_cols = cols / divisor;
|
||||
int lowest_rows = rows / divisor;
|
||||
const int min_image_dim_size = 2;
|
||||
CV_Assert(std::min(lowest_cols, lowest_rows) > min_image_dim_size);
|
||||
|
||||
init(stream);
|
||||
|
||||
datas[0] = data;
|
||||
|
||||
calcBP(disp, stream);
|
||||
}
|
||||
private:
|
||||
void init(Stream& stream)
|
||||
{
|
||||
u.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||
d.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||
l.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||
r.create(rows * rthis.ndisp, cols, rthis.msg_type);
|
||||
|
||||
if (rthis.levels & 1)
|
||||
{
|
||||
//can clear less area
|
||||
if (stream)
|
||||
{
|
||||
stream.enqueueMemSet(u, zero);
|
||||
stream.enqueueMemSet(d, zero);
|
||||
stream.enqueueMemSet(l, zero);
|
||||
stream.enqueueMemSet(r, zero);
|
||||
}
|
||||
else
|
||||
{
|
||||
u.setTo(zero);
|
||||
d.setTo(zero);
|
||||
l.setTo(zero);
|
||||
r.setTo(zero);
|
||||
}
|
||||
}
|
||||
|
||||
if (rthis.levels > 1)
|
||||
{
|
||||
int less_rows = (rows + 1) / 2;
|
||||
int less_cols = (cols + 1) / 2;
|
||||
|
||||
u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
|
||||
d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
|
||||
l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
|
||||
r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type);
|
||||
|
||||
if ((rthis.levels & 1) == 0)
|
||||
{
|
||||
if (stream)
|
||||
{
|
||||
stream.enqueueMemSet(u2, zero);
|
||||
stream.enqueueMemSet(d2, zero);
|
||||
stream.enqueueMemSet(l2, zero);
|
||||
stream.enqueueMemSet(r2, zero);
|
||||
}
|
||||
else
|
||||
{
|
||||
u2.setTo(zero);
|
||||
d2.setTo(zero);
|
||||
l2.setTo(zero);
|
||||
r2.setTo(zero);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
load_constants(rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, scale * rthis.max_disc_term, scale * rthis.disc_single_jump);
|
||||
|
||||
datas.resize(rthis.levels);
|
||||
|
||||
cols_all.resize(rthis.levels);
|
||||
rows_all.resize(rthis.levels);
|
||||
|
||||
cols_all[0] = cols;
|
||||
rows_all[0] = rows;
|
||||
}
|
||||
|
||||
void calcBP(GpuMat& disp, Stream& stream)
|
||||
{
|
||||
typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream);
|
||||
static const data_step_down_t data_step_down_callers[2] =
|
||||
{
|
||||
data_step_down_gpu<short>, data_step_down_gpu<float>
|
||||
};
|
||||
|
||||
typedef void (*level_up_messages_t)(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream);
|
||||
static const level_up_messages_t level_up_messages_callers[2] =
|
||||
{
|
||||
level_up_messages_gpu<short>, level_up_messages_gpu<float>
|
||||
};
|
||||
|
||||
typedef void (*calc_all_iterations_t)(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream);
|
||||
static const calc_all_iterations_t calc_all_iterations_callers[2] =
|
||||
{
|
||||
calc_all_iterations_gpu<short>, calc_all_iterations_gpu<float>
|
||||
};
|
||||
|
||||
typedef void (*output_t)(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz<short>& disp, cudaStream_t stream);
|
||||
static const output_t output_callers[2] =
|
||||
{
|
||||
output_gpu<short>, output_gpu<float>
|
||||
};
|
||||
|
||||
const int funcIdx = rthis.msg_type == CV_32F;
|
||||
|
||||
cudaStream_t cudaStream = StreamAccessor::getStream(stream);
|
||||
|
||||
for (int i = 1; i < rthis.levels; ++i)
|
||||
{
|
||||
cols_all[i] = (cols_all[i-1] + 1) / 2;
|
||||
rows_all[i] = (rows_all[i-1] + 1) / 2;
|
||||
|
||||
datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type);
|
||||
|
||||
data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream);
|
||||
}
|
||||
|
||||
PtrStepSzb mus[] = {u, u2};
|
||||
PtrStepSzb mds[] = {d, d2};
|
||||
PtrStepSzb mrs[] = {r, r2};
|
||||
PtrStepSzb mls[] = {l, l2};
|
||||
|
||||
int mem_idx = (rthis.levels & 1) ? 0 : 1;
|
||||
|
||||
for (int i = rthis.levels - 1; i >= 0; --i)
|
||||
{
|
||||
// for lower level we have already computed messages by setting to zero
|
||||
if (i != rthis.levels - 1)
|
||||
level_up_messages_callers[funcIdx](mem_idx, cols_all[i], rows_all[i], rows_all[i+1], mus, mds, mls, mrs, cudaStream);
|
||||
|
||||
calc_all_iterations_callers[funcIdx](cols_all[i], rows_all[i], rthis.iters, mus[mem_idx], mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i], cudaStream);
|
||||
|
||||
mem_idx = (mem_idx + 1) & 1;
|
||||
}
|
||||
|
||||
if (disp.empty())
|
||||
disp.create(rows, cols, CV_16S);
|
||||
|
||||
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
||||
|
||||
if (stream)
|
||||
stream.enqueueMemSet(out, zero);
|
||||
else
|
||||
out.setTo(zero);
|
||||
|
||||
output_callers[funcIdx](u, d, l, r, datas.front(), out, cudaStream);
|
||||
|
||||
if (disp.type() != CV_16S)
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueConvert(out, disp, disp.type());
|
||||
else
|
||||
out.convertTo(disp, disp.type());
|
||||
}
|
||||
}
|
||||
|
||||
StereoBeliefPropagation& rthis;
|
||||
|
||||
GpuMat& u;
|
||||
GpuMat& d;
|
||||
GpuMat& l;
|
||||
GpuMat& r;
|
||||
|
||||
GpuMat& u2;
|
||||
GpuMat& d2;
|
||||
GpuMat& l2;
|
||||
GpuMat& r2;
|
||||
|
||||
std::vector<GpuMat>& datas;
|
||||
GpuMat& out;
|
||||
|
||||
const Scalar zero;
|
||||
const float scale;
|
||||
|
||||
int rows, cols;
|
||||
|
||||
std::vector<int> cols_all, rows_all;
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||
{
|
||||
StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
||||
impl(left, right, disp, stream);
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
|
||||
{
|
||||
StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
||||
impl(data, disp, stream);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@@ -1,311 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int&, int&, int&, int&) { throw_no_cuda(); }
|
||||
|
||||
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int) { throw_no_cuda(); }
|
||||
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float, float, float, int, int) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
#include "opencv2/core/utility.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace stereocsbp
|
||||
{
|
||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||
const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& temp);
|
||||
|
||||
template<class T>
|
||||
void init_data_cost(int rows, int cols, T* disp_selected_pyr, T* data_cost_selected, size_t msg_step,
|
||||
int h, int w, int level, int nr_plane, int ndisp, int channels, bool use_local_init_data_cost, cudaStream_t stream);
|
||||
|
||||
template<class T>
|
||||
void compute_data_cost(const T* disp_selected_pyr, T* data_cost, size_t msg_step,
|
||||
int rows, int cols, int h, int w, int h2, int level, int nr_plane, int channels, cudaStream_t stream);
|
||||
|
||||
template<class T>
|
||||
void init_message(T* u_new, T* d_new, T* l_new, T* r_new,
|
||||
const T* u_cur, const T* d_cur, const T* l_cur, const T* r_cur,
|
||||
T* selected_disp_pyr_new, const T* selected_disp_pyr_cur,
|
||||
T* data_cost_selected, const T* data_cost, size_t msg_step,
|
||||
int h, int w, int nr_plane, int h2, int w2, int nr_plane2, cudaStream_t stream);
|
||||
|
||||
template<class T>
|
||||
void calc_all_iterations(T* u, T* d, T* l, T* r, const T* data_cost_selected,
|
||||
const T* selected_disp_pyr_cur, size_t msg_step, int h, int w, int nr_plane, int iters, cudaStream_t stream);
|
||||
|
||||
template<class T>
|
||||
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||
const PtrStepSz<short>& disp, int nr_plane, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
using namespace ::cv::gpu::cudev::stereocsbp;
|
||||
|
||||
namespace
|
||||
{
|
||||
const float DEFAULT_MAX_DATA_TERM = 30.0f;
|
||||
const float DEFAULT_DATA_WEIGHT = 1.0f;
|
||||
const float DEFAULT_MAX_DISC_TERM = 160.0f;
|
||||
const float DEFAULT_DISC_SINGLE_JUMP = 10.0f;
|
||||
}
|
||||
|
||||
void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
|
||||
{
|
||||
ndisp = (int) ((float) width / 3.14f);
|
||||
if ((ndisp & 1) != 0)
|
||||
ndisp++;
|
||||
|
||||
int mm = std::max(width, height);
|
||||
iters = mm / 100 + ((mm > 1200)? - 4 : 4);
|
||||
|
||||
levels = (int)::log(static_cast<double>(mm)) * 2 / 3;
|
||||
if (levels == 0) levels++;
|
||||
|
||||
nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1));
|
||||
}
|
||||
|
||||
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
|
||||
int msg_type_)
|
||||
|
||||
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
|
||||
max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT),
|
||||
max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0),
|
||||
msg_type(msg_type_), use_local_init_data_cost(true)
|
||||
{
|
||||
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
|
||||
}
|
||||
|
||||
cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_,
|
||||
float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_,
|
||||
int min_disp_th_, int msg_type_)
|
||||
: ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_),
|
||||
max_data_term(max_data_term_), data_weight(data_weight_),
|
||||
max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_),
|
||||
msg_type(msg_type_), use_local_init_data_cost(true)
|
||||
{
|
||||
CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S);
|
||||
}
|
||||
|
||||
template<class T>
|
||||
static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||
{
|
||||
CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane
|
||||
&& left.rows == right.rows && left.cols == right.cols && left.type() == right.type());
|
||||
|
||||
CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4));
|
||||
|
||||
const Scalar zero = Scalar::all(0);
|
||||
|
||||
cudaStream_t cudaStream = StreamAccessor::getStream(stream);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Init
|
||||
|
||||
int rows = left.rows;
|
||||
int cols = left.cols;
|
||||
|
||||
rthis.levels = std::min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0)));
|
||||
int levels = rthis.levels;
|
||||
|
||||
// compute sizes
|
||||
AutoBuffer<int> buf(levels * 3);
|
||||
int* cols_pyr = buf;
|
||||
int* rows_pyr = cols_pyr + levels;
|
||||
int* nr_plane_pyr = rows_pyr + levels;
|
||||
|
||||
cols_pyr[0] = cols;
|
||||
rows_pyr[0] = rows;
|
||||
nr_plane_pyr[0] = rthis.nr_plane;
|
||||
|
||||
for (int i = 1; i < levels; i++)
|
||||
{
|
||||
cols_pyr[i] = cols_pyr[i-1] / 2;
|
||||
rows_pyr[i] = rows_pyr[i-1] / 2;
|
||||
nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2;
|
||||
}
|
||||
|
||||
|
||||
GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected;
|
||||
|
||||
|
||||
//allocate buffers
|
||||
int buffers_count = 10; // (up + down + left + right + disp_selected_pyr) * 2
|
||||
buffers_count += 2; // data_cost has twice more rows than other buffers, what's why +2, not +1;
|
||||
buffers_count += 1; // data_cost_selected
|
||||
mbuf.create(rows * rthis.nr_plane * buffers_count, cols, DataType<T>::type);
|
||||
|
||||
data_cost = mbuf.rowRange(0, rows * rthis.nr_plane * 2);
|
||||
data_cost_selected = mbuf.rowRange(data_cost.rows, data_cost.rows + rows * rthis.nr_plane);
|
||||
|
||||
for(int k = 0; k < 2; ++k) // in/out
|
||||
{
|
||||
GpuMat sub1 = mbuf.rowRange(data_cost.rows + data_cost_selected.rows, mbuf.rows);
|
||||
GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2);
|
||||
|
||||
GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] };
|
||||
for(int _r = 0; _r < 5; ++_r)
|
||||
{
|
||||
*buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5);
|
||||
assert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane);
|
||||
}
|
||||
};
|
||||
|
||||
size_t elem_step = mbuf.step / sizeof(T);
|
||||
|
||||
Size temp_size = data_cost.size();
|
||||
if ((size_t)temp_size.area() < elem_step * rows_pyr[levels - 1] * rthis.ndisp)
|
||||
temp_size = Size(static_cast<int>(elem_step), rows_pyr[levels - 1] * rthis.ndisp);
|
||||
|
||||
temp.create(temp_size, DataType<T>::type);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Compute
|
||||
|
||||
load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
|
||||
|
||||
if (stream)
|
||||
{
|
||||
stream.enqueueMemSet(l[0], zero);
|
||||
stream.enqueueMemSet(d[0], zero);
|
||||
stream.enqueueMemSet(r[0], zero);
|
||||
stream.enqueueMemSet(u[0], zero);
|
||||
|
||||
stream.enqueueMemSet(l[1], zero);
|
||||
stream.enqueueMemSet(d[1], zero);
|
||||
stream.enqueueMemSet(r[1], zero);
|
||||
stream.enqueueMemSet(u[1], zero);
|
||||
|
||||
stream.enqueueMemSet(data_cost, zero);
|
||||
stream.enqueueMemSet(data_cost_selected, zero);
|
||||
}
|
||||
else
|
||||
{
|
||||
l[0].setTo(zero);
|
||||
d[0].setTo(zero);
|
||||
r[0].setTo(zero);
|
||||
u[0].setTo(zero);
|
||||
|
||||
l[1].setTo(zero);
|
||||
d[1].setTo(zero);
|
||||
r[1].setTo(zero);
|
||||
u[1].setTo(zero);
|
||||
|
||||
data_cost.setTo(zero);
|
||||
data_cost_selected.setTo(zero);
|
||||
}
|
||||
|
||||
int cur_idx = 0;
|
||||
|
||||
for (int i = levels - 1; i >= 0; i--)
|
||||
{
|
||||
if (i == levels - 1)
|
||||
{
|
||||
init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(),
|
||||
elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, cudaStream);
|
||||
}
|
||||
else
|
||||
{
|
||||
compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), elem_step,
|
||||
left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), cudaStream);
|
||||
|
||||
int new_idx = (cur_idx + 1) & 1;
|
||||
|
||||
init_message(u[new_idx].ptr<T>(), d[new_idx].ptr<T>(), l[new_idx].ptr<T>(), r[new_idx].ptr<T>(),
|
||||
u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||
disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(),
|
||||
data_cost_selected.ptr<T>(), data_cost.ptr<T>(), elem_step, rows_pyr[i],
|
||||
cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], cudaStream);
|
||||
|
||||
cur_idx = new_idx;
|
||||
}
|
||||
|
||||
calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step,
|
||||
rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, cudaStream);
|
||||
}
|
||||
|
||||
if (disp.empty())
|
||||
disp.create(rows, cols, CV_16S);
|
||||
|
||||
out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out));
|
||||
|
||||
if (stream)
|
||||
stream.enqueueMemSet(out, zero);
|
||||
else
|
||||
out.setTo(zero);
|
||||
|
||||
compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), elem_step, out, nr_plane_pyr[0], cudaStream);
|
||||
|
||||
if (disp.type() != CV_16S)
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueConvert(out, disp, disp.type());
|
||||
else
|
||||
out.convertTo(disp, disp.type());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
typedef void (*csbp_operator_t)(StereoConstantSpaceBP& rthis, GpuMat& mbuf,
|
||||
GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream);
|
||||
|
||||
const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator<short>, 0, csbp_operator<float>, 0, 0};
|
||||
|
||||
void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||
{
|
||||
CV_Assert(msg_type == CV_32F || msg_type == CV_16S);
|
||||
operators[msg_type](*this, messages_buffers, temp, out, left, right, disp, stream);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@@ -1,348 +0,0 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CUDA
|
||||
|
||||
using namespace cvtest;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// StereoBM
|
||||
|
||||
struct StereoBM : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(StereoBM, Regression)
|
||||
{
|
||||
cv::Mat left_image = readImage("stereobm/aloe-L.png", cv::IMREAD_GRAYSCALE);
|
||||
cv::Mat right_image = readImage("stereobm/aloe-R.png", cv::IMREAD_GRAYSCALE);
|
||||
cv::Mat disp_gold = readImage("stereobm/aloe-disp.png", cv::IMREAD_GRAYSCALE);
|
||||
|
||||
ASSERT_FALSE(left_image.empty());
|
||||
ASSERT_FALSE(right_image.empty());
|
||||
ASSERT_FALSE(disp_gold.empty());
|
||||
|
||||
cv::gpu::StereoBM_GPU bm(0, 128, 19);
|
||||
cv::gpu::GpuMat disp;
|
||||
|
||||
bm(loadMat(left_image), loadMat(right_image), disp);
|
||||
|
||||
EXPECT_MAT_NEAR(disp_gold, disp, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBM, ALL_DEVICES);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// StereoBeliefPropagation
|
||||
|
||||
struct StereoBeliefPropagation : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(StereoBeliefPropagation, Regression)
|
||||
{
|
||||
cv::Mat left_image = readImage("stereobp/aloe-L.png");
|
||||
cv::Mat right_image = readImage("stereobp/aloe-R.png");
|
||||
cv::Mat disp_gold = readImage("stereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
|
||||
|
||||
ASSERT_FALSE(left_image.empty());
|
||||
ASSERT_FALSE(right_image.empty());
|
||||
ASSERT_FALSE(disp_gold.empty());
|
||||
|
||||
cv::gpu::StereoBeliefPropagation bp(64, 8, 2, 25, 0.1f, 15, 1, CV_16S);
|
||||
cv::gpu::GpuMat disp;
|
||||
|
||||
bp(loadMat(left_image), loadMat(right_image), disp);
|
||||
|
||||
cv::Mat h_disp(disp);
|
||||
h_disp.convertTo(h_disp, disp_gold.depth());
|
||||
|
||||
EXPECT_MAT_NEAR(disp_gold, h_disp, 0.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoBeliefPropagation, ALL_DEVICES);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// StereoConstantSpaceBP
|
||||
|
||||
struct StereoConstantSpaceBP : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(StereoConstantSpaceBP, Regression)
|
||||
{
|
||||
cv::Mat left_image = readImage("csstereobp/aloe-L.png");
|
||||
cv::Mat right_image = readImage("csstereobp/aloe-R.png");
|
||||
|
||||
cv::Mat disp_gold;
|
||||
|
||||
if (supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20))
|
||||
disp_gold = readImage("csstereobp/aloe-disp.png", cv::IMREAD_GRAYSCALE);
|
||||
else
|
||||
disp_gold = readImage("csstereobp/aloe-disp_CC1X.png", cv::IMREAD_GRAYSCALE);
|
||||
|
||||
ASSERT_FALSE(left_image.empty());
|
||||
ASSERT_FALSE(right_image.empty());
|
||||
ASSERT_FALSE(disp_gold.empty());
|
||||
|
||||
cv::gpu::StereoConstantSpaceBP csbp(128, 16, 4, 4);
|
||||
cv::gpu::GpuMat disp;
|
||||
|
||||
csbp(loadMat(left_image), loadMat(right_image), disp);
|
||||
|
||||
cv::Mat h_disp(disp);
|
||||
h_disp.convertTo(h_disp, disp_gold.depth());
|
||||
|
||||
EXPECT_MAT_NEAR(disp_gold, h_disp, 1.0);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoConstantSpaceBP, ALL_DEVICES);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// transformPoints
|
||||
|
||||
struct TransformPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(TransformPoints, Accuracy)
|
||||
{
|
||||
cv::Mat src = randomMat(cv::Size(1000, 1), CV_32FC3, 0, 10);
|
||||
cv::Mat rvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
|
||||
cv::Mat tvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
|
||||
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::transformPoints(loadMat(src), rvec, tvec, dst);
|
||||
|
||||
ASSERT_EQ(src.size(), dst.size());
|
||||
ASSERT_EQ(src.type(), dst.type());
|
||||
|
||||
cv::Mat h_dst(dst);
|
||||
|
||||
cv::Mat rot;
|
||||
cv::Rodrigues(rvec, rot);
|
||||
|
||||
for (int i = 0; i < h_dst.cols; ++i)
|
||||
{
|
||||
cv::Point3f res = h_dst.at<cv::Point3f>(0, i);
|
||||
|
||||
cv::Point3f p = src.at<cv::Point3f>(0, i);
|
||||
cv::Point3f res_gold(
|
||||
rot.at<float>(0, 0) * p.x + rot.at<float>(0, 1) * p.y + rot.at<float>(0, 2) * p.z + tvec.at<float>(0, 0),
|
||||
rot.at<float>(1, 0) * p.x + rot.at<float>(1, 1) * p.y + rot.at<float>(1, 2) * p.z + tvec.at<float>(0, 1),
|
||||
rot.at<float>(2, 0) * p.x + rot.at<float>(2, 1) * p.y + rot.at<float>(2, 2) * p.z + tvec.at<float>(0, 2));
|
||||
|
||||
ASSERT_POINT3_NEAR(res_gold, res, 1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, TransformPoints, ALL_DEVICES);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// ProjectPoints
|
||||
|
||||
struct ProjectPoints : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(ProjectPoints, Accuracy)
|
||||
{
|
||||
cv::Mat src = randomMat(cv::Size(1000, 1), CV_32FC3, 0, 10);
|
||||
cv::Mat rvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
|
||||
cv::Mat tvec = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
|
||||
cv::Mat camera_mat = randomMat(cv::Size(3, 3), CV_32F, 0.5, 1);
|
||||
camera_mat.at<float>(0, 1) = 0.f;
|
||||
camera_mat.at<float>(1, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 1) = 0.f;
|
||||
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::projectPoints(loadMat(src), rvec, tvec, camera_mat, cv::Mat(), dst);
|
||||
|
||||
ASSERT_EQ(1, dst.rows);
|
||||
ASSERT_EQ(MatType(CV_32FC2), MatType(dst.type()));
|
||||
|
||||
std::vector<cv::Point2f> dst_gold;
|
||||
cv::projectPoints(src, rvec, tvec, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), dst_gold);
|
||||
|
||||
ASSERT_EQ(dst_gold.size(), static_cast<size_t>(dst.cols));
|
||||
|
||||
cv::Mat h_dst(dst);
|
||||
|
||||
for (size_t i = 0; i < dst_gold.size(); ++i)
|
||||
{
|
||||
cv::Point2f res = h_dst.at<cv::Point2f>(0, (int)i);
|
||||
cv::Point2f res_gold = dst_gold[i];
|
||||
|
||||
ASSERT_LE(cv::norm(res_gold - res) / cv::norm(res_gold), 1e-3f);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ProjectPoints, ALL_DEVICES);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SolvePnPRansac
|
||||
|
||||
struct SolvePnPRansac : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(SolvePnPRansac, Accuracy)
|
||||
{
|
||||
cv::Mat object = randomMat(cv::Size(5000, 1), CV_32FC3, 0, 100);
|
||||
cv::Mat camera_mat = randomMat(cv::Size(3, 3), CV_32F, 0.5, 1);
|
||||
camera_mat.at<float>(0, 1) = 0.f;
|
||||
camera_mat.at<float>(1, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 0) = 0.f;
|
||||
camera_mat.at<float>(2, 1) = 0.f;
|
||||
|
||||
std::vector<cv::Point2f> image_vec;
|
||||
cv::Mat rvec_gold;
|
||||
cv::Mat tvec_gold;
|
||||
rvec_gold = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
|
||||
tvec_gold = randomMat(cv::Size(3, 1), CV_32F, 0, 1);
|
||||
cv::projectPoints(object, rvec_gold, tvec_gold, camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)), image_vec);
|
||||
|
||||
cv::Mat rvec, tvec;
|
||||
std::vector<int> inliers;
|
||||
cv::gpu::solvePnPRansac(object, cv::Mat(1, (int)image_vec.size(), CV_32FC2, &image_vec[0]),
|
||||
camera_mat, cv::Mat(1, 8, CV_32F, cv::Scalar::all(0)),
|
||||
rvec, tvec, false, 200, 2.f, 100, &inliers);
|
||||
|
||||
ASSERT_LE(cv::norm(rvec - rvec_gold), 1e-3);
|
||||
ASSERT_LE(cv::norm(tvec - tvec_gold), 1e-3);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, SolvePnPRansac, ALL_DEVICES);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// reprojectImageTo3D
|
||||
|
||||
PARAM_TEST_CASE(ReprojectImageTo3D, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
cv::Size size;
|
||||
int depth;
|
||||
bool useRoi;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
size = GET_PARAM(1);
|
||||
depth = GET_PARAM(2);
|
||||
useRoi = GET_PARAM(3);
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
}
|
||||
};
|
||||
|
||||
GPU_TEST_P(ReprojectImageTo3D, Accuracy)
|
||||
{
|
||||
cv::Mat disp = randomMat(size, depth, 5.0, 30.0);
|
||||
cv::Mat Q = randomMat(cv::Size(4, 4), CV_32FC1, 0.1, 1.0);
|
||||
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::reprojectImageTo3D(loadMat(disp, useRoi), dst, Q, 3);
|
||||
|
||||
cv::Mat dst_gold;
|
||||
cv::reprojectImageTo3D(disp, dst_gold, Q, false);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(GPU_Calib3D, ReprojectImageTo3D, testing::Combine(
|
||||
ALL_DEVICES,
|
||||
DIFFERENT_SIZES,
|
||||
testing::Values(MatDepth(CV_8U), MatDepth(CV_16S)),
|
||||
WHOLE_SUBMAT));
|
||||
|
||||
#endif // HAVE_CUDA
|
Reference in New Issue
Block a user