Merge pull request #2243 from ilya-lavrenov:ocl2trash
This commit is contained in:
@ -4,4 +4,4 @@ endif()
set(the_description "Functionality with possible limitations on the use")
set(the_description "Functionality with possible limitations on the use")
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_cudaarithm opencv_ocl)
ocv_define_module(nonfree opencv_imgproc opencv_features2d opencv_calib3d OPTIONAL opencv_cudaarithm)
@ -1,11 +0,0 @@
set(the_description "OpenCL-accelerated Computer Vision")
ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml "${OPENCL_LIBRARIES}")
if(TARGET opencv_test_ocl)
target_link_libraries(opencv_test_ocl "${OPENCL_LIBRARIES}")
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
@ -1,328 +0,0 @@
Camera Calibration and 3D Reconstruction
.. highlight:: cpp
.. ocv:class:: ocl::StereoBM_OCL
Class computing stereo correspondence (disparity map) using the block matching algorithm. ::
enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 };
//! the default constructor
//! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8.
StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
//! Output disparity has CV_8U type.
void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity);
//! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm.
// It queries current active device.
static bool checkIfGpuCallReasonable();
int preset;
int ndisp;
int winSize;
// If avergeTexThreshold == 0 => post procesing is disabled
// If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
// i.e. input left image is low textured.
float avergeTexThreshold;
/* hidden */
The class also performs pre- and post-filtering steps: Sobel pre-filtering (if ``PREFILTER_XSOBEL`` flag is set) and low textureness filtering (if ``averageTexThreshols > 0`` ). If ``avergeTexThreshold = 0`` , low textureness filtering is disabled. Otherwise, the disparity is set to 0 in each point ``(x, y)`` , where for the left image
.. math::
\sum HorizontalGradiensInWindow(x, y, winSize) < (winSize \cdot winSize) \cdot avergeTexThreshold
This means that the input left image is low textured.
Enables :ocv:class:`ocl::StereoBM_OCL` constructors.
.. ocv:function:: ocl::StereoBM_OCL::StereoBM_OCL()
.. ocv:function:: ocl::StereoBM_OCL::StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ)
:param preset: Parameter presetting:
* **BASIC_PRESET** Basic mode without pre-processing.
* **PREFILTER_XSOBEL** Sobel pre-filtering mode.
:param ndisparities: Number of disparities. It must be a multiple of 8 and less or equal to 256.
:param winSize: Block size.
ocl::StereoBM_OCL::operator ()
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
.. ocv:function:: void ocl::StereoBM_OCL::operator ()(const oclMat& left, const oclMat& right, oclMat& disparity)
:param left: Left image. Only ``CV_8UC1`` type is supported.
:param right: Right image with the same size and the same type as the left one.
:param disparity: Output disparity map. It is a ``CV_8UC1`` image with the same size as the input images.
Uses a heuristic method to estimate whether the current GPU is faster than the CPU in this algorithm. It queries the currently active device.
.. ocv:function:: bool ocl::StereoBM_OCL::checkIfGpuCallReasonable()
.. ocv:class:: ocl::StereoBeliefPropagation
Class computing stereo correspondence using the belief propagation algorithm. ::
class CV_EXPORTS StereoBeliefPropagation
enum { DEFAULT_NDISP = 64 };
enum { DEFAULT_ITERS = 5 };
enum { DEFAULT_LEVELS = 5 };
static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels);
explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int msg_type = CV_16S);
StereoBeliefPropagation(int ndisp, int iters, int levels,
float max_data_term, float data_weight,
float max_disc_term, float disc_single_jump,
int msg_type = CV_32F);
void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
void operator()(const oclMat &data, oclMat &disparity);
int ndisp;
int iters;
int levels;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int msg_type;
/* hidden */
The class implements algorithm described in [Felzenszwalb2006]_ . It can compute own data cost (using a truncated linear model) or use a user-provided data cost.
.. note::
``StereoBeliefPropagation`` requires a lot of memory for message storage:
.. math::
width \_ step \cdot height \cdot ndisp \cdot 4 \cdot (1 + 0.25)
and for data cost storage:
.. math::
width\_step \cdot height \cdot ndisp \cdot (1 + 0.25 + 0.0625 + \dotsm + \frac{1}{4^{levels}})
``width_step`` is the number of bytes in a line including padding.
Enables the :ocv:class:`ocl::StereoBeliefPropagation` constructors.
.. ocv:function:: ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_16S)
.. ocv:function:: ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F)
:param ndisp: Number of disparities.
:param iters: Number of BP iterations on each level.
:param levels: Number of levels.
:param max_data_term: Threshold for data cost truncation.
:param data_weight: Data weight.
:param max_disc_term: Threshold for discontinuity truncation.
:param disc_single_jump: Discontinuity single jump.
:param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported.
``StereoBeliefPropagation`` uses a truncated linear model for the data cost and discontinuity terms:
.. math::
DataCost = data \_ weight \cdot \min ( \lvert Img_Left(x,y)-Img_Right(x-d,y) \rvert , max \_ data \_ term)
.. math::
DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)
For more details, see [Felzenszwalb2006]_.
By default, :ocv:class:`ocl::StereoBeliefPropagation` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
.. math::
10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX
Uses a heuristic method to compute the recommended parameters ( ``ndisp``, ``iters`` and ``levels`` ) for the specified image size ( ``width`` and ``height`` ).
.. ocv:function:: void ocl::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
ocl::StereoBeliefPropagation::operator ()
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair or data cost.
.. ocv:function:: void ocl::StereoBeliefPropagation::operator ()(const oclMat& left, const oclMat& right, oclMat& disparity)
.. ocv:function:: void ocl::StereoBeliefPropagation::operator ()(const oclMat& data, oclMat& disparity)
:param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported.
:param right: Right image with the same size and the same type as the left one.
:param data: User-specified data cost, a matrix of ``msg_type`` type and ``Size(<image columns>*ndisp, <image rows>)`` size.
:param disparity: Output disparity map. If ``disparity`` is empty, the output type is ``CV_16SC1`` . Otherwise, the type is retained.
.. ocv:class:: ocl::StereoConstantSpaceBP
Class computing stereo correspondence using the constant space belief propagation algorithm. ::
class CV_EXPORTS StereoConstantSpaceBP
enum { DEFAULT_NDISP = 128 };
enum { DEFAULT_ITERS = 8 };
enum { DEFAULT_LEVELS = 4 };
enum { DEFAULT_NR_PLANE = 4 };
static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane);
explicit StereoConstantSpaceBP(
int ndisp = DEFAULT_NDISP,
int iters = DEFAULT_ITERS,
int levels = DEFAULT_LEVELS,
int nr_plane = DEFAULT_NR_PLANE,
int msg_type = CV_32F);
StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane,
float max_data_term, float data_weight, float max_disc_term, float disc_single_jump,
int min_disp_th = 0,
int msg_type = CV_32F);
void operator()(const oclMat &left, const oclMat &right, oclMat &disparity);
int ndisp;
int iters;
int levels;
int nr_plane;
float max_data_term;
float data_weight;
float max_disc_term;
float disc_single_jump;
int min_disp_th;
int msg_type;
bool use_local_init_data_cost;
/* hidden */
The class implements algorithm described in [Yang2010]_. ``StereoConstantSpaceBP`` supports both local minimum and global minimum data cost initialization algorithms. For more details, see the paper mentioned above. By default, a local algorithm is used. To enable a global algorithm, set ``use_local_init_data_cost`` to ``false`` .
Enables the :ocv:class:`ocl::StereoConstantSpaceBP` constructors.
.. ocv:function:: ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F)
.. ocv:function:: ocl::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th = 0, int msg_type = CV_32F)
:param ndisp: Number of disparities.
:param iters: Number of BP iterations on each level.
:param levels: Number of levels.
:param nr_plane: Number of disparity levels on the first level.
:param max_data_term: Truncation of data cost.
:param data_weight: Data weight.
:param max_disc_term: Truncation of discontinuity.
:param disc_single_jump: Discontinuity single jump.
:param min_disp_th: Minimal disparity threshold.
:param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported.
``StereoConstantSpaceBP`` uses a truncated linear model for the data cost and discontinuity terms:
.. math::
DataCost = data \_ weight \cdot \min ( \lvert I_2-I_1 \rvert , max \_ data \_ term)
.. math::
DiscTerm = \min (disc \_ single \_ jump \cdot \lvert f_1-f_2 \rvert , max \_ disc \_ term)
For more details, see [Yang2010]_.
By default, ``StereoConstantSpaceBP`` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement:
.. math::
10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX
Uses a heuristic method to compute parameters (ndisp, iters, levelsand nrplane) for the specified image size (widthand height).
.. ocv:function:: void ocl::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane)
ocl::StereoConstantSpaceBP::operator ()
Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair.
.. ocv:function:: void ocl::StereoConstantSpaceBP::operator ()(const oclMat& left, const oclMat& right, oclMat& disparity)
:param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported.
:param right: Right image with the same size and the same type as the left one.
:param disparity: Output disparity map. If ``disparity`` is empty, the output type is ``CV_16SC1`` . Otherwise, the output type is ``disparity.type()`` .
@ -1,189 +0,0 @@
Data Structures
.. ocv:class:: ocl::oclMat
OpenCV C++ 1-D or 2-D dense array class ::
class CV_EXPORTS oclMat
//! default constructor
//! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
oclMat(int rows, int cols, int type);
oclMat(Size size, int type);
//! constucts oclMatrix and fills it with the specified value _s.
oclMat(int rows, int cols, int type, const Scalar &s);
oclMat(Size size, int type, const Scalar &s);
//! copy constructor
oclMat(const oclMat &m);
//! constructor for oclMatrix headers pointing to user-allocated data
oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP);
oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP);
//! creates a matrix header for a part of the bigger matrix
oclMat(const oclMat &m, const Range &rowRange, const Range &colRange);
oclMat(const oclMat &m, const Rect &roi);
//! builds oclMat from Mat. Perfom blocking upload to device.
explicit oclMat (const Mat &m);
//! destructor - calls release()
//! assignment operators
oclMat &operator = (const oclMat &m);
//! assignment operator. Perfom blocking upload to device.
oclMat &operator = (const Mat &m);
oclMat &operator = (const oclMatExpr& expr);
//! pefroms blocking upload data to oclMat.
void upload(const cv::Mat &m);
//! downloads data from device to host memory. Blocking calls.
operator Mat() const;
void download(cv::Mat &m) const;
//! convert to _InputArray
operator _InputArray();
//! convert to _OutputArray
operator _OutputArray();
//! returns a new oclMatrix header for the specified row
oclMat row(int y) const;
//! returns a new oclMatrix header for the specified column
oclMat col(int x) const;
//! ... for the specified row span
oclMat rowRange(int startrow, int endrow) const;
oclMat rowRange(const Range &r) const;
//! ... for the specified column span
oclMat colRange(int startcol, int endcol) const;
oclMat colRange(const Range &r) const;
//! returns deep copy of the oclMatrix, i.e. the data is copied
oclMat clone() const;
//! copies those oclMatrix elements to "m" that are marked with non-zero mask elements.
// It calls m.create(this->size(), this->type()).
// It supports any data type
void copyTo( oclMat &m, const oclMat &mask = oclMat()) const;
//! converts oclMatrix to another datatype with optional scalng. See cvConvertScale.
void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const;
void assignTo( oclMat &m, int type = -1 ) const;
//! sets every oclMatrix element to s
oclMat& operator = (const Scalar &s);
//! sets some of the oclMatrix elements to s, according to the mask
oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat());
//! creates alternative oclMatrix header for the same data, with different
// number of channels and/or different number of rows. see cvReshape.
oclMat reshape(int cn, int rows = 0) const;
//! allocates new oclMatrix data unless the oclMatrix already has specified size and type.
// previous data is unreferenced if needed.
void create(int rows, int cols, int type);
void create(Size size, int type);
//! allocates new oclMatrix with specified device memory type.
void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type);
void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type);
//! decreases reference counter;
// deallocate the data when reference counter reaches 0.
void release();
//! swaps with other smart pointer
void swap(oclMat &mat);
//! locates oclMatrix header within a parent oclMatrix. See below
void locateROI( Size &wholeSize, Point &ofs ) const;
//! moves/resizes the current oclMatrix ROI inside the parent oclMatrix.
oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright );
//! extracts a rectangular sub-oclMatrix
// (this is a generalized form of row, rowRange etc.)
oclMat operator()( Range rowRange, Range colRange ) const;
oclMat operator()( const Rect &roi ) const;
oclMat& operator+=( const oclMat& m );
oclMat& operator-=( const oclMat& m );
oclMat& operator*=( const oclMat& m );
oclMat& operator/=( const oclMat& m );
//! returns true if the oclMatrix data is continuous
// (i.e. when there are no gaps between successive rows).
// similar to CV_IS_oclMat_CONT(cvoclMat->type)
bool isContinuous() const;
//! returns element size in bytes,
// similar to CV_ELEM_SIZE(cvMat->type)
size_t elemSize() const;
//! returns the size of element channel in bytes.
size_t elemSize1() const;
//! returns element type, similar to CV_MAT_TYPE(cvMat->type)
int type() const;
//! returns element type, i.e. 8UC3 returns 8UC4 because in ocl
//! 3 channels element actually use 4 channel space
int ocltype() const;
//! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
int depth() const;
//! returns element type, similar to CV_MAT_CN(cvMat->type)
int channels() const;
//! returns element type, return 4 for 3 channels element,
//!becuase 3 channels element actually use 4 channel space
int oclchannels() const;
//! returns step/elemSize1()
size_t step1() const;
//! returns oclMatrix size:
// width == number of columns, height == number of rows
Size size() const;
//! returns true if oclMatrix data is NULL
bool empty() const;
//! matrix transposition
oclMat t() const;
/*! includes several bit-fields:
- the magic signature
- continuity flag
- depth
- number of channels
int flags;
//! the number of rows and columns
int rows, cols;
//! a distance between successive rows in bytes; includes the gap if any
size_t step;
//! pointer to the data(OCL memory object)
uchar *data;
//! pointer to the reference counter;
// when oclMatrix points to user-allocated data, the pointer is NULL
int *refcount;
//! helper fields used in locateROI and adjustROI
//datastart and dataend are not used in current version
uchar *datastart;
uchar *dataend;
//! OpenCL context associated with the oclMat object.
Context *clCxt;
//add offset for handle ROI, calculated in byte
int offset;
//add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
int wholerows;
int wholecols;
Basically speaking, the ``oclMat`` is the mirror of ``Mat`` with the extension of OCL feature, the members have the same meaning and useage of ``Mat`` except following:
* ``datastart`` and ``dataend`` are replaced with ``wholerows`` and ``wholecols``
* Only basic flags are supported in ``oclMat`` (i.e. depth number of channels)
* All the 3-channel matrix (i.e. RGB image) are represented by 4-channel matrix in ``oclMat``. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV ``Mat`` and ``oclMat``.
For example: If a ``oclMat`` has 3 channels, ``channels()`` returns 3 and ``oclchannels()`` returns 4
@ -1,836 +0,0 @@
Feature Detection And Description
.. highlight:: cpp
Finds edges in an image using the [Canny86]_ algorithm.
.. ocv:function:: void ocl::Canny(const oclMat& image, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
.. ocv:function:: void ocl::Canny(const oclMat& image, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false)
.. ocv:function:: void ocl::Canny(const oclMat& dx, const oclMat& dy, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
.. ocv:function:: void ocl::Canny(const oclMat& dx, const oclMat& dy, CannyBuf& buf, oclMat& edges, double low_thresh, double high_thresh, bool L2gradient = false)
:param image: Single-channel 8-bit input image.
:param dx: First derivative of image in the vertical direction. Support only ``CV_32S`` type.
:param dy: First derivative of image in the horizontal direction. Support only ``CV_32S`` type.
:param edges: Output edge map. It has the same size and type as ``image`` .
:param low_thresh: First threshold for the hysteresis procedure.
:param high_thresh: Second threshold for the hysteresis procedure.
:param apperture_size: Aperture size for the :ocv:func:`Sobel` operator.
:param L2gradient: Flag indicating whether a more accurate :math:`L_2` norm :math:`=\sqrt{(dI/dx)^2 + (dI/dy)^2}` should be used to compute the image gradient magnitude ( ``L2gradient=true`` ), or a faster default :math:`L_1` norm :math:`=|dI/dx|+|dI/dy|` is enough ( ``L2gradient=false`` ).
:param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
.. seealso:: :ocv:func:`Canny`
.. ocv:class:: ocl::BruteForceMatcher_OCL_base
Brute-force descriptor matcher. For each descriptor in the first set, this matcher finds the closest descriptor in the second set by trying each one. This descriptor matcher supports masking permissible matches between descriptor sets. ::
class BruteForceMatcher_OCL_base
enum DistType {L1Dist = 0, L2Dist, HammingDist};
// Add descriptors to train descriptor collection.
void add(const std::vector<oclMat>& descCollection);
// Get train descriptors collection.
const std::vector<oclMat>& getTrainDescriptors() const;
// Clear train descriptors collection.
void clear();
// Return true if there are no train descriptors in collection.
bool empty() const;
// Return true if the matcher supports mask in match methods.
bool isMaskSupported() const;
void matchSingle(const oclMat& query, const oclMat& train,
oclMat& trainIdx, oclMat& distance,
const oclMat& mask = oclMat());
static void matchDownload(const oclMat& trainIdx,
const oclMat& distance, std::vector<DMatch>& matches);
static void matchConvert(const Mat& trainIdx,
const Mat& distance, std::vector<DMatch>& matches);
void match(const oclMat& query, const oclMat& train,
std::vector<DMatch>& matches, const oclMat& mask = oclMat());
void makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection,
const vector<oclMat>& masks = std::vector<oclMat>());
void matchCollection(const oclMat& query, const oclMat& trainCollection,
oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,
const oclMat& maskCollection);
static void matchDownload(const oclMat& trainIdx, oclMat& imgIdx,
const oclMat& distance, std::vector<DMatch>& matches);
static void matchConvert(const Mat& trainIdx, const Mat& imgIdx,
const Mat& distance, std::vector<DMatch>& matches);
void match(const oclMat& query, std::vector<DMatch>& matches,
const std::vector<oclMat>& masks = std::vector<oclMat>());
void knnMatchSingle(const oclMat& query, const oclMat& train,
oclMat& trainIdx, oclMat& distance, oclMat& allDist, int k,
const oclMat& mask = oclMat());
static void knnMatchDownload(const oclMat& trainIdx, const oclMat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
static void knnMatchConvert(const Mat& trainIdx, const Mat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
void knnMatch(const oclMat& query, const oclMat& train,
std::vector< std::vector<DMatch> >& matches, int k,
const oclMat& mask = oclMat(), bool compactResult = false);
void knnMatch2Collection(const oclMat& query, const oclMat& trainCollection,
oclMat& trainIdx, oclMat& imgIdx, oclMat& distance,
const oclMat& maskCollection = oclMat());
static void knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
static void knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
void knnMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, int k,
const std::vector<oclMat>& masks = std::vector<oclMat>(),
bool compactResult = false);
void radiusMatchSingle(const oclMat& query, const oclMat& train,
oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance,
const oclMat& mask = oclMat());
static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
static void radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
void radiusMatch(const oclMat& query, const oclMat& train,
std::vector< std::vector<DMatch> >& matches, float maxDistance,
const oclMat& mask = oclMat(), bool compactResult = false);
void radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, oclMat& nMatches, float maxDistance,
const std::vector<oclMat>& masks = std::vector<oclMat>());
static void radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
static void radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches,
std::vector< std::vector<DMatch> >& matches, bool compactResult = false);
void radiusMatch(const oclMat& query, std::vector< std::vector<DMatch> >& matches, float maxDistance,
const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false);
DistType distType;
std::vector<oclMat> trainDescCollection;
The class ``BruteForceMatcher_OCL_base`` has an interface similar to the class :ocv:class:`DescriptorMatcher`. It has two groups of ``match`` methods: for matching descriptors of one image with another image or with an image set. Also, all functions have an alternative to save results either to the GPU memory or to the CPU memory. ``BruteForceMatcher_OCL_base`` supports only the ``L1<float>``, ``L2<float>``, and ``Hamming`` distance types.
.. seealso:: :ocv:class:`DescriptorMatcher`, :ocv:class:`BFMatcher`
Finds the best match for each descriptor from a query set with train descriptors.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, const oclMat& train, std::vector<DMatch>& matches, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat& query, const oclMat& train, oclMat& trainIdx, oclMat& distance, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::match(const oclMat& query, std::vector<DMatch>& matches, const std::vector<oclMat>& masks = std::vector<oclMat>())
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchCollection( const oclMat& query, const oclMat& trainCollection, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, const oclMat& masks=oclMat() )
.. seealso:: :ocv:func:`DescriptorMatcher::match`
Performs a GPU collection of train descriptors and masks in a suitable format for the :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchCollection` function.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat& trainCollection, oclMat& maskCollection, const vector<oclMat>& masks = std::vector<oclMat>())
Downloads matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchCollection` to vector with :ocv:class:`DMatch`.
.. ocv:function:: static void ocl::BruteForceMatcher_OCL_base::matchDownload( const oclMat& trainIdx, const oclMat& distance, std::vector<DMatch>& matches )
.. ocv:function:: static void ocl::BruteForceMatcher_OCL_base::matchDownload( const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, std::vector<DMatch>& matches )
Converts matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::matchCollection` to vector with :ocv:class:`DMatch`.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& distance, std::vector<DMatch>&matches)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector<DMatch>&matches)
Finds the ``k`` best matches for each descriptor from a query set with train descriptors.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, const oclMat& train, std::vector< std::vector<DMatch> >&matches, int k, const oclMat& mask = oclMat(), bool compactResult = false)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat& query, const oclMat& train, oclMat& trainIdx, oclMat& distance, oclMat& allDist, int k, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat& query, std::vector< std::vector<DMatch> >&matches, int k, const std::vector<oclMat>&masks = std::vector<oclMat>(), bool compactResult = false )
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat& query, const oclMat& trainCollection, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, const oclMat& maskCollection = oclMat())
:param query: Query set of descriptors.
:param train: Training set of descriptors. It is not be added to train descriptors collection stored in the class object.
:param k: Number of the best matches per each query descriptor (or less if it is not possible).
:param mask: Mask specifying permissible matches between the input query and train matrices of descriptors.
:param compactResult: If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
The function returns detected ``k`` (or less if not possible) matches in the increasing order by distance.
The third variant of the method stores the results in GPU memory.
.. seealso:: :ocv:func:`DescriptorMatcher::knnMatch`
Downloads matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatch2Collection` to vector with :ocv:class:`DMatch`.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat& trainIdx, const oclMat& distance, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
Converts matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::knnMatch2Collection` to CPU vector with :ocv:class:`DMatch`.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat& trainIdx, const Mat& distance, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
For each query descriptor, finds the best matches with a distance less than a given threshold.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, const oclMat& train, std::vector< std::vector<DMatch> >&matches, float maxDistance, const oclMat& mask = oclMat(), bool compactResult = false)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat& query, const oclMat& train, oclMat& trainIdx, oclMat& distance, oclMat& nMatches, float maxDistance, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat& query, std::vector< std::vector<DMatch> >&matches, float maxDistance, const std::vector<oclMat>& masks = std::vector<oclMat>(), bool compactResult = false)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat& query, oclMat& trainIdx, oclMat& imgIdx, oclMat& distance, oclMat& nMatches, float maxDistance, const std::vector<oclMat>& masks = std::vector<oclMat>())
:param query: Query set of descriptors.
:param train: Training set of descriptors. It is not added to train descriptors collection stored in the class object.
:param maxDistance: Distance threshold.
:param mask: Mask specifying permissible matches between the input query and train matrices of descriptors.
:param compactResult: If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
The function returns detected matches in the increasing order by distance.
The methods work only on devices with the compute capability :math:`>=` 1.1.
The third variant of the method stores the results in GPU memory and does not store the points by the distance.
.. seealso:: :ocv:func:`DescriptorMatcher::radiusMatch`
Downloads matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& distance, const oclMat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat& trainIdx, const oclMat& imgIdx, const oclMat& distance, const oclMat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
Converts matrices obtained via :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchSingle` or :ocv:func:`ocl::BruteForceMatcher_OCL_base::radiusMatchCollection` to vector with :ocv:class:`DMatch`.
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >&matches, bool compactResult = false)
.. ocv:function:: void ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat& trainIdx, const Mat& imgIdx, const Mat& distance, const Mat& nMatches, std::vector< std::vector<DMatch> >& matches, bool compactResult = false)
If ``compactResult`` is ``true`` , the ``matches`` vector does not contain matches for fully masked-out query descriptors.
.. ocv:class:: ocl::FAST_OCL
Class used for corner detection using the FAST algorithm. ::
X_ROW = 0,
// all features have same size
static const int FEATURE_SIZE = 7;
explicit FAST_OCL(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05);
//! finds the keypoints using FAST detector
//! supports only CV_8UC1 images
void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
//! download keypoints from device to host memory
static void downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
//! convert keypoints to KeyPoint vector
static void convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints);
//! release temporary buffer's memory
void release();
bool nonmaxSupression;
int threshold;
//! max keypoints = keypointsRatio * img.size().area()
double keypointsRatio;
//! find keypoints and compute it's response if nonmaxSupression is true
//! return count of detected keypoints
int calcKeyPointsLocation(const oclMat& image, const oclMat& mask);
//! get final array of keypoints
//! performs nonmax supression if needed
//! return final count of keypoints
int getKeyPoints(oclMat& keypoints);
// Hidden
The class ``FAST_OCL`` implements FAST corner detection algorithm.
.. seealso:: :ocv:func:`FAST`
.. ocv:function:: ocl::FAST_OCL::FAST_OCL(int threshold, bool nonmaxSupression = true, double keypointsRatio = 0.05)
:param threshold: Threshold on difference between intensity of the central pixel and pixels on a circle around this pixel.
:param nonmaxSupression: If it is true, non-maximum suppression is applied to detected corners (keypoints).
:param keypointsRatio: Inner buffer size for keypoints store is determined as (keypointsRatio * image_width * image_height).
ocl::FAST_OCL::operator ()
Finds the keypoints using FAST detector.
.. ocv:function:: void ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints)
.. ocv:function:: void ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
:param image: Image where keypoints (corners) are detected. Only 8-bit grayscale images are supported.
:param mask: Optional input mask that marks the regions where we should detect features.
:param keypoints: The output vector of keypoints. Can be stored both in host or device memory. For device memory:
* X_ROW of keypoints will contain the horizontal coordinate of the i'th point
* Y_ROW of keypoints will contain the vertical coordinate of the i'th point
* RESPONSE_ROW will contain response of i'th point (if non-maximum suppression is applied)
Download keypoints from device to host memory.
.. ocv:function:: void ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints)
Converts keypoints from OpenCL representation to vector of ``KeyPoint``.
.. ocv:function:: void ocl::FAST_OCL::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
Releases inner buffer memory.
.. ocv:function:: void ocl::FAST_OCL::release()
Find keypoints. If ``nonmaxSupression`` is true, responses are computed and eliminates keypoints with the smaller responses from 9-neighborhood regions.
.. ocv:function:: int ocl::FAST_OCL::calcKeyPointsLocation(const oclMat& image, const oclMat& mask)
:param image: Image where keypoints (corners) are detected. Only 8-bit grayscale images are supported.
:param mask: Optional input mask that marks the regions where we should detect features.
The function returns the amount of detected keypoints.
Gets final array of keypoints.
.. ocv:function:: int ocl::FAST_OCL::getKeyPoints(oclMat& keypoints)
:param keypoints: The output vector of keypoints.
The function performs non-max suppression if needed and returns the final amount of keypoints.
.. ocv:class:: ocl::BRIEF_OCL
Class for computing BRIEF descriptors described in a paper of Calonder M., Lepetit V.,
Strecha C., Fua P. *BRIEF: Binary Robust Independent Elementary Features* ,
11th European Conference on Computer Vision (ECCV), Heraklion, Crete. LNCS Springer, September 2010. ::
static const int PATCH_SIZE = 48;
static const int KERNEL_SIZE = 9;
explicit BRIEF_OCL(int _bytes = 32);
//!computes the brief descriptor for a set of given keypoints
//! supports only CV_8UC1 images
void compute(const oclMat& image, const oclMat& keypoints, oclMat& mask, oclMat& descriptors) const;
static int getBorderSize();
.. ocv:function:: ocl::BRIEF_OCL::BRIEF_OCL(int _bytes = 32)
:param bytes: The length of the descriptor in bytes. Supported values are 16, 32 or 64 bytes.
Computes BRIEF descriptors.
.. ocv:function:: void ocl::BRIEF_OCL::compute(const oclMat& image, const oclMat& keypoints, oclMat& mask, oclMat& descriptors) const
:param image: Image The input 8-bit grayscale image.
:param keypoints: The keypoints.
:param mask: In and output mask. If mask has same cols as keypoints, descriptors are computed for keypoints with non-zero mask element.
On return it indicates for what keypoints a descriptor was computed or not(if a keypoint is near the image border).
:param descriptors: The computed descriptors. It has size keypoints.cols x bytes.
Returns the size of the image border where descriptors cannot be computed
.. ocv:function:: static int ocl::BRIEF_OCL::getBorderSize() const
.. ocv:struct:: ocl::HOGDescriptor
The class implements Histogram of Oriented Gradients ([Dalal2005]_) object detector. ::
struct CV_EXPORTS HOGDescriptor
enum { DEFAULT_WIN_SIGMA = -1 };
enum { DEFAULT_NLEVELS = 64 };
HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
double threshold_L2hys=0.2, bool gamma_correction=true,
int nlevels=DEFAULT_NLEVELS);
size_t getDescriptorSize() const;
size_t getBlockHistogramSize() const;
void setSVMDetector(const vector<float>& detector);
static vector<float> getDefaultPeopleDetector();
static vector<float> getPeopleDetector48x96();
static vector<float> getPeopleDetector64x128();
void detect(const oclMat& img, vector<Point>& found_locations,
double hit_threshold=0, Size win_stride=Size(),
Size padding=Size());
void detectMultiScale(const oclMat& img, vector<Rect>& found_locations,
double hit_threshold=0, Size win_stride=Size(),
Size padding=Size(), double scale0=1.05,
int group_threshold=2);
void getDescriptors(const oclMat& img, Size win_stride,
oclMat& descriptors,
int descr_format=DESCR_FORMAT_COL_BY_COL);
Size win_size;
Size block_size;
Size block_stride;
Size cell_size;
int nbins;
double win_sigma;
double threshold_L2hys;
bool gamma_correction;
int nlevels;
// Hidden
Interfaces of all methods are kept similar to the ``CPU HOG`` descriptor and detector analogues as much as possible.
.. note::
(Ocl) An example using the HOG descriptor can be found at opencv_source_code/samples/ocl/hog.cpp
Creates the ``HOG`` descriptor and detector.
.. ocv:function:: ocl::HOGDescriptor::HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA, double threshold_L2hys=0.2, bool gamma_correction=true, int nlevels=DEFAULT_NLEVELS)
:param win_size: Detection window size. Align to block size and block stride.
:param block_size: Block size in pixels. Align to cell size. Only (16,16) is supported for now.
:param block_stride: Block stride. It must be a multiple of cell size.
:param cell_size: Cell size. Only (8, 8) is supported for now.
:param nbins: Number of bins. Only 9 bins per cell are supported for now.
:param win_sigma: Gaussian smoothing window parameter.
:param threshold_L2hys: L2-Hys normalization method shrinkage.
:param gamma_correction: Flag to specify whether the gamma correction preprocessing is required or not.
:param nlevels: Maximum number of detection window increases.
Returns the number of coefficients required for the classification.
.. ocv:function:: size_t ocl::HOGDescriptor::getDescriptorSize() const
Returns the block histogram size.
.. ocv:function:: size_t ocl::HOGDescriptor::getBlockHistogramSize() const
Sets coefficients for the linear SVM classifier.
.. ocv:function:: void ocl::HOGDescriptor::setSVMDetector(const vector<float>& detector)
Returns coefficients of the classifier trained for people detection (for default window size).
.. ocv:function:: static vector<float> ocl::HOGDescriptor::getDefaultPeopleDetector()
Returns coefficients of the classifier trained for people detection (for 48x96 windows).
.. ocv:function:: static vector<float> ocl::HOGDescriptor::getPeopleDetector48x96()
Returns coefficients of the classifier trained for people detection (for 64x128 windows).
.. ocv:function:: static vector<float> ocl::HOGDescriptor::getPeopleDetector64x128()
Performs object detection without a multi-scale window.
.. ocv:function:: void ocl::HOGDescriptor::detect(const oclMat& img, vector<Point>& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size())
:param img: Source image. ``CV_8UC1`` and ``CV_8UC4`` types are supported for now.
:param found_locations: Left-top corner points of detected objects boundaries.
:param hit_threshold: Threshold for the distance between features and SVM classifying plane. Usually it is 0 and should be specfied in the detector coefficients (as the last free coefficient). But if the free coefficient is omitted (which is allowed), you can specify it manually here.
:param win_stride: Window stride. It must be a multiple of block stride.
:param padding: Mock parameter to keep the CPU interface compatibility. It must be (0,0).
Performs object detection with a multi-scale window.
.. ocv:function:: void ocl::HOGDescriptor::detectMultiScale(const oclMat& img, vector<Rect>& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), double scale0=1.05, int group_threshold=2)
:param img: Source image. See :ocv:func:`ocl::HOGDescriptor::detect` for type limitations.
:param found_locations: Detected objects boundaries.
:param hit_threshold: Threshold for the distance between features and SVM classifying plane. See :ocv:func:`ocl::HOGDescriptor::detect` for details.
:param win_stride: Window stride. It must be a multiple of block stride.
:param padding: Mock parameter to keep the CPU interface compatibility. It must be (0,0).
:param scale0: Coefficient of the detection window increase.
:param group_threshold: Coefficient to regulate the similarity threshold. When detected, some objects can be covered by many rectangles. 0 means not to perform grouping. See :ocv:func:`groupRectangles` .
Returns block descriptors computed for the whole image.
.. ocv:function:: void ocl::HOGDescriptor::getDescriptors(const oclMat& img, Size win_stride, oclMat& descriptors, int descr_format=DESCR_FORMAT_COL_BY_COL)
:param img: Source image. See :ocv:func:`ocl::HOGDescriptor::detect` for type limitations.
:param win_stride: Window stride. It must be a multiple of block stride.
:param descriptors: 2D array of descriptors.
:param descr_format: Descriptor storage format:
* **DESCR_FORMAT_ROW_BY_ROW** - Row-major order.
* **DESCR_FORMAT_COL_BY_COL** - Column-major order.
The function is mainly used to learn the classifier.
.. ocv:class:: ocl::ORB_OCL
Class for extracting ORB features and descriptors from an image. ::
class ORB_OCL
X_ROW = 0,
explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f,
int nLevels = 8, int edgeThreshold = 31,
int firstLevel = 0, int WTA_K = 2,
int scoreType = 0, int patchSize = 31);
void operator()(const oclMat& image, const oclMat& mask,
std::vector<KeyPoint>& keypoints);
void operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
void operator()(const oclMat& image, const oclMat& mask,
std::vector<KeyPoint>& keypoints, oclMat& descriptors);
void operator()(const oclMat& image, const oclMat& mask,
oclMat& keypoints, oclMat& descriptors);
void downloadKeyPoints(oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
void convertKeyPoints(Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
int descriptorSize() const;
int descriptorType() const;
int defaultNorm() const;
void setFastParams(int threshold, bool nonmaxSupression = true);
void release();
bool blurForDescriptor;
The class implements ORB feature detection and description algorithm.
.. ocv:function:: ocl::ORB_OCL::ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31)
:param nfeatures: The maximum number of features to retain.
:param scaleFactor: Pyramid decimation ratio, greater than 1. ``scaleFactor==2`` means the classical pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor will mean that to cover certain scale range you will need more pyramid levels and so the speed will suffer.
:param nlevels: The number of pyramid levels. The smallest level will have linear size equal to ``input_image_linear_size/pow(scaleFactor, nlevels)``.
:param edgeThreshold: This is size of the border where the features are not detected. It should roughly match the ``patchSize`` parameter.
:param firstLevel: It should be 0 in the current implementation.
:param WTA_K: The number of points that produce each element of the oriented BRIEF descriptor. The default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 random points (of course, those point coordinates are random, but they are generated from the pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, denoted as ``NORM_HAMMING2`` (2 bits per bin). When ``WTA_K=4``, we take 4 random points to compute each bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3).
:param scoreType: The default HARRIS_SCORE means that Harris algorithm is used to rank features (the score is written to ``KeyPoint::score`` and is used to retain best ``nfeatures`` features); FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, but it is a little faster to compute.
:param patchSize: size of the patch used by the oriented BRIEF descriptor. Of course, on smaller pyramid layers the perceived image area covered by a feature will be larger.
Detects keypoints and computes descriptors for them.
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints)
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors)
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors)
:param image: Input 8-bit grayscale image.
:param mask: Optional input mask that marks the regions where we should detect features.
:param keypoints: The input/output vector of keypoints. Can be stored both in host and device memory. For device memory:
* ``X_ROW`` contains the horizontal coordinate of the i'th feature.
* ``Y_ROW`` contains the vertical coordinate of the i'th feature.
* ``RESPONSE_ROW`` contains the response of the i'th feature.
* ``ANGLE_ROW`` contains the orientation of the i'th feature.
* ``RESPONSE_ROW`` contains the octave of the i'th feature.
* ``ANGLE_ROW`` contains the size of the i'th feature.
:param descriptors: Computed descriptors. if ``blurForDescriptor`` is true, image will be blurred before descriptors calculation.
Download keypoints from device to host memory.
.. ocv:function:: static void ocl::ORB_OCL::downloadKeyPoints( const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints )
Converts keypoints from OCL representation to vector of ``KeyPoint``.
.. ocv:function:: static void ocl::ORB_OCL::convertKeyPoints( const Mat& d_keypoints, std::vector<KeyPoint>& keypoints )
Releases inner buffer memory.
.. ocv:function:: void ocl::ORB_OCL::release()
@ -1,719 +0,0 @@
Image Filtering
.. highlight:: cpp
.. ocv:class:: ocl::BaseRowFilter_GPU
Base class for linear or non-linear filters that processes rows of 2D arrays. Such filters are used for the "horizontal" filtering passes in separable filters. ::
class CV_EXPORTS BaseRowFilter_GPU
BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
virtual ~BaseRowFilter_GPU() {}
virtual void operator()(const oclMat &src, oclMat &dst) = 0;
int ksize, anchor, bordertype;
.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`ocl::FilterEngine_GPU`.
.. ocv:class:: ocl::BaseColumnFilter_GPU
Base class for linear or non-linear filters that processes columns of 2D arrays. Such filters are used for the "vertical" filtering passes in separable filters. ::
class CV_EXPORTS BaseColumnFilter_GPU
BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {}
virtual ~BaseColumnFilter_GPU() {}
virtual void operator()(const oclMat &src, oclMat &dst) = 0;
int ksize, anchor, bordertype;
.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`ocl::FilterEngine_GPU`.
.. ocv:class:: ocl::BaseFilter_GPU
Base class for non-separable 2D filters. ::
class CV_EXPORTS BaseFilter_GPU
BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_)
: ksize(ksize_), anchor(anchor_), borderType(borderType_) {}
virtual ~BaseFilter_GPU() {}
virtual void operator()(const oclMat &src, oclMat &dst) = 0;
Size ksize;
Point anchor;
int borderType;
.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`ocl::FilterEngine_GPU`
.. ocv:class:: ocl::FilterEngine_GPU
Base class for the Filter Engine. ::
class CV_EXPORTS FilterEngine_GPU
virtual ~FilterEngine_GPU() {}
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0;
The class can be used to apply an arbitrary filtering operation to an image. It contains all the necessary intermediate buffers. Pointers to the initialized ``FilterEngine_GPU`` instances are returned by various ``create*Filter_GPU`` functions (see below), and they are used inside high-level functions such as :ocv:func:`ocl::filter2D`, :ocv:func:`ocl::erode`, :ocv:func:`ocl::Sobel` , and others.
By using ``FilterEngine_GPU`` instead of functions you can avoid unnecessary memory allocation for intermediate buffers and get better performance: ::
while (...)
ocl::oclMat src = getImg();
ocl::oclMat dst;
// Allocate and release buffers at each iterations
ocl::GaussianBlur(src, dst, ksize, sigma1);
// Allocate buffers only once
cv::Ptr<ocl::FilterEngine_GPU> filter =
ocl::createGaussianFilter_GPU(CV_8UC4, ksize, sigma1);
while (...)
ocl::oclMat src = getImg();
ocl::oclMat dst;
filter->apply(src, dst, cv::Rect(0, 0, src.cols, src.rows));
// Release buffers only once
``FilterEngine_GPU`` can process a rectangular sub-region of an image. By default, if ``roi == Rect(0,0,-1,-1)`` , ``FilterEngine_GPU`` processes the inner region of an image ( ``Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height)`` ) because some filters do not check whether indices are outside the image for better performance. See below to understand which filters support processing the whole image and which do not and identify image type limitations.
.. note:: The GPU filters do not support the in-place mode.
.. seealso:: :ocv:class:`ocl::BaseRowFilter_GPU`, :ocv:class:`ocl::BaseColumnFilter_GPU`, :ocv:class:`ocl::BaseFilter_GPU`, :ocv:func:`ocl::createFilter2D_GPU`, :ocv:func:`ocl::createSeparableFilter_GPU`, :ocv:func:`ocl::createBoxFilter_GPU`, :ocv:func:`ocl::createMorphologyFilter_GPU`, :ocv:func:`ocl::createLinearFilter_GPU`, :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`ocl::createDerivFilter_GPU`, :ocv:func:`ocl::createGaussianFilter_GPU`
Creates a non-separable filter engine with the specified filter.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createFilter2D_GPU( const Ptr<BaseFilter_GPU> filter2D)
:param filter2D: Non-separable 2D filter.
Usually this function is used inside such high-level functions as :ocv:func:`ocl::createLinearFilter_GPU`, :ocv:func:`ocl::createBoxFilter_GPU`.
Creates a separable filter engine with the specified filters.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter, const Ptr<BaseColumnFilter_GPU> &columnFilter)
:param rowFilter: "Horizontal" 1D filter.
:param columnFilter: "Vertical" 1D filter.
Usually this function is used inside such high-level functions as :ocv:func:`ocl::createSeparableLinearFilter_GPU`.
Creates a normalized 2D box filter.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createBoxFilter_GPU(int srcType, int dstType, const Size &ksize, const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
.. ocv:function:: Ptr<BaseFilter_GPU> ocl::getBoxFilter_GPU(int srcType, int dstType, const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
:param srcType: Input image type.
:param dstType: Output image type. It supports only the same values as the source type.
:param ksize: Kernel size.
:param anchor: Anchor point. The default value ``Point(-1, -1)`` means that the anchor is at the kernel center.
:param borderType: Border type.
.. seealso:: :ocv:func:`boxFilter`
Smooths the image using the normalized box filter.
.. ocv:function:: void ocl::boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
:param src: Input image.
:param dst: Output image type. The size and type is the same as ``src`` .
:param ddepth: Desired depth of the destination image. If it is negative, it is the same as ``src.depth()`` . It supports only the same depth as the source image depth.
:param ksize: Kernel size.
:param anchor: Anchor point. The default value ``Point(-1, -1)`` means that the anchor is at the kernel center.
:param borderType: Border type.
Smoothes image using box filter.
Acts as a synonym for the normalized box filter.
.. ocv:function:: void ocl::blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1), int borderType = BORDER_CONSTANT)
:param src: Input image.
:param dst: Output image type with the same size and type as ``src`` .
:param ksize: Kernel size.
:param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center.
:param borderType: Border type.
.. seealso:: :ocv:func:`blur`, :ocv:func:`ocl::boxFilter`
Creates a 2D morphological filter.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Point &anchor = Point(-1, -1), int iterations = 1)
.. ocv:function:: Ptr<BaseFilter_GPU> ocl::getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize, Point anchor = Point(-1, -1))
:param op: Morphology operation id. Only ``MORPH_ERODE`` and ``MORPH_DILATE`` are supported.
:param type: Input/output image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported.
:param kernel: 2D 8-bit structuring element for the morphological operation.
:param ksize: Size of a horizontal or vertical structuring element used for separable morphological operations.
:param anchor: Anchor position within the structuring element. Negative values mean that the anchor is at the center.
.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it.
.. seealso:: :ocv:func:`createMorphologyFilter`
Creates a non-separable linear filter.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT)
:param srcType: Input image type..
:param dstType: Output image type. The same type as ``src`` is supported.
:param kernel: 2D array of filter coefficients.
:param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center.
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` .
.. seealso:: :ocv:func:`createLinearFilter`
Applies the non-separable 2D linear filter to an image.
.. ocv:function:: void ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor = Point(-1, -1), double delta = 0.0, int borderType = BORDER_DEFAULT)
:param src: Source image.
:param dst: Destination image. The size and the number of channels is the same as ``src`` .
:param ddepth: Desired depth of the destination image. If it is negative, it is the same as ``src.depth()`` . It supports only the same depth as the source image depth.
:param kernel: 2D array of filter coefficients.
:param anchor: Anchor of the kernel that indicates the relative position of a filtered point within the kernel. The anchor resides within the kernel. The special default value (-1,-1) means that the anchor is at the kernel center.
:param delta: optional value added to the filtered pixels before storing them in ``dst``. Value '0' is supported only.
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` .
Creates a primitive row filter with the specified kernel.
.. ocv:function:: Ptr<BaseRowFilter_GPU> ocl::getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel, int anchor = -1, int bordertype = BORDER_DEFAULT)
:param srcType: Source array type. Only ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
:param bufType: Intermediate buffer type with as many channels as ``srcType`` .
:param rowKernel: Filter coefficients. Support kernels with ``size <= 16`` .
:param anchor: Anchor position within the kernel. Negative values mean that the anchor is positioned at the aperture center.
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
.. seealso:: :ocv:func:`createSeparableLinearFilter` .
Creates a primitive column filter with the specified kernel.
.. ocv:function:: Ptr<BaseColumnFilter_GPU> ocl::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel, int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0)
:param bufType: Intermediate buffer type with as many channels as ``dstType`` .
:param dstType: Destination array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` destination types are supported.
:param columnKernel: Filter coefficients. Support kernels with ``size <= 16`` .
:param anchor: Anchor position within the kernel. Negative values mean that the anchor is positioned at the aperture center.
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` .
:param delta: default value is 0.0.
.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
Creates a separable linear filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
:param dstType: Destination array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` destination types are supported.
:param rowKernel: Horizontal filter coefficients. Support kernels with ``size <= 16`` .
:param columnKernel: Vertical filter coefficients. Support kernels with ``size <= 16`` .
:param anchor: Anchor position within the kernel. Negative values mean that anchor is positioned at the aperture center.
:param delta: default value is 0.0.
:param bordertype: Pixel extrapolation method.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
Applies a separable 2D linear filter to an image.
.. ocv:function:: void ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT)
:param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
:param dst: Destination image with the same size and number of channels as ``src`` .
:param ddepth: Destination image depth. ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and ``CV_32F`` are supported.
:param kernelX: Horizontal filter coefficients.
:param kernelY: Vertical filter coefficients.
:param anchor: Anchor position within the kernel. The default value ``(-1, 1)`` means that the anchor is at the kernel center.
:param delta: default value is 0.0.
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`sepFilter2D`
Creates a filter engine for the generalized Sobel operator.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
:param dstType: Destination image type with as many channels as ``srcType`` , ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and ``CV_32F`` depths are supported.
:param dx: Derivative order in respect of x.
:param dy: Derivative order in respect of y.
:param ksize: Aperture size. See :ocv:func:`getDerivKernels` for details.
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter`
Returns void
.. ocv:function:: void ocl::Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT)
:param src: The source image
:param dst: The destination image; It will have the same size as src
:param ddepth: The destination image depth
:param dx: Order of the derivative x
:param dy: Order of the derivative y
:param ksize: Size of the extended Sobel kernel
:param scale: The optional scale factor for the computed derivative values(by default, no scaling is applied)
:param delta: The optional delta value, added to the results prior to storing them in dst
:param bordertype: Pixel extrapolation method.
The function computes the first x- or y- spatial image derivative using Sobel operator. Surpport 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data type.
Returns void
.. ocv:function:: void ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT)
:param src: The source image
:param dst: The destination image; It will have the same size as src
:param ddepth: The destination image depth
:param dx: Order of the derivative x
:param dy: Order of the derivative y
:param scale: The optional scale factor for the computed derivative values(by default, no scaling is applied)
:param delta: The optional delta value, added to the results prior to storing them in dst
:param bordertype: Pixel extrapolation method.
The function computes the first x- or y- spatial image derivative using Scharr operator. Surpport 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data type.
Creates a Gaussian filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported.
:param ksize: Aperture size. See :ocv:func:`getGaussianKernel` for details.
:param sigma1: Gaussian sigma in the horizontal direction. See :ocv:func:`getGaussianKernel` for details.
:param sigma2: Gaussian sigma in the vertical direction. If 0, then :math:`\texttt{sigma2}\leftarrow\texttt{sigma1}` .
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter`
Returns void
.. ocv:function:: void ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT)
:param src: The source image
:param dst: The destination image; It will have the same size and the same type as src
:param ksize: The Gaussian kernel size; ksize.width and ksize.height can differ, but they both must be positive and odd. Or, they can be zero's, then they are computed from sigma
:param sigma1sigma2: The Gaussian kernel standard deviations in X and Y direction. If sigmaY is zero, it is set to be equal to sigmaX. If they are both zeros, they are computed from ksize.width and ksize.height. To fully control the result regardless of possible future modification of all this semantics, it is recommended to specify all of ksize, sigmaX and sigmaY
:param bordertype: Pixel extrapolation method.
The function convolves the source image with the specified Gaussian kernel. In-place filtering is supported. Surpport 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data type.
Returns void
.. ocv:function:: void ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1, double delta = 0, int borderType = BORDER_DEFAULT)
:param src: The source image
:param dst: The destination image; It will have the same size and the same type as src
:param ddepth: The desired depth of the destination image
:param ksize: The aperture size used to compute the second-derivative filters. It must be positive and odd
:param scale: The optional scale factor for the computed Laplacian values (by default, no scaling is applied
:param delta: Optional delta value that is added to the results prior to storing them in ``dst`` . Supported value is 0 only.
:param bordertype: Pixel extrapolation method.
The function calculates the Laplacian of the source image by adding up the second x and y derivatives calculated using the Sobel operator.
.. ocv:struct:: ocl::ConvolveBuf
Class providing a memory buffer for :ocv:func:`ocl::convolve` function, plus it allows to adjust some specific parameters. ::
struct CV_EXPORTS ConvolveBuf
Size result_size;
Size block_size;
Size user_block_size;
Size dft_size;
int spect_len;
oclMat image_spect, templ_spect, result_spect;
oclMat image_block, templ_block, result_data;
void create(Size image_size, Size templ_size);
static Size estimateBlockSize(Size result_size, Size templ_size);
You can use field `user_block_size` to set specific block size for :ocv:func:`ocl::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
.. ocv:function:: ocl::ConvolveBuf::create(Size image_size, Size templ_size)
Constructs a buffer for :ocv:func:`ocl::convolve` function with respective arguments.
Returns void
.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr=false)
.. ocv:function:: void ocl::convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf)
:param image: The source image. Only ``CV_32FC1`` images are supported for now.
:param temp1: Convolution kernel, a single-channel floating point matrix. The size is not greater than the ``image`` size. The type is the same as ``image``.
:param result: The destination image
:param ccorr: Flags to evaluate cross-correlation instead of convolution.
:param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`ocl::ConvolveBuf`.
Convolves an image with the kernel. Supports only CV_32FC1 data types and do not support ROI.
Returns void
.. ocv:function:: void ocl::bilateralFilter(const oclMat &src, oclMat &dst, int d, double sigmaColor, double sigmaSpace, int borderType=BORDER_DEFAULT)
:param src: The source image
:param dst: The destination image; will have the same size and the same type as src
:param d: The diameter of each pixel neighborhood, that is used during filtering. If it is non-positive, it's computed from sigmaSpace
:param sigmaColor: Filter sigma in the color space. Larger value of the parameter means that farther colors within the pixel neighborhood (see sigmaSpace) will be mixed together, resulting in larger areas of semi-equal color
:param sigmaSpave: Filter sigma in the coordinate space. Larger value of the parameter means that farther pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
:param borderType: Pixel extrapolation method.
Applies bilateral filter to the image. Supports 8UC1 8UC4 data types.
Returns void
.. ocv:function:: void ocl::adaptiveBilateralFilter(const oclMat& src, oclMat& dst, Size ksize, double sigmaSpace, double maxSigmaColor = 20.0, Point anchor = Point(-1, -1), int borderType=BORDER_DEFAULT)
:param src: The source image
:param dst: The destination image; will have the same size and the same type as src
:param ksize: The kernel size. This is the neighborhood where the local variance will be calculated, and where pixels will contribute (in a weighted manner).
:param sigmaSpace: Filter sigma in the coordinate space. Larger value of the parameter means that farther pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
:param maxSigmaColor: Maximum allowed sigma color (will clamp the value calculated in the ksize neighborhood. Larger value of the parameter means that more dissimilar pixels will influence each other (as long as their colors are close enough; see sigmaColor). Then d>0, it specifies the neighborhood size regardless of sigmaSpace, otherwise d is proportional to sigmaSpace.
:param borderType: Pixel extrapolation method.
A main part of our strategy will be to load each raw pixel once, and reuse it to calculate all pixels in the output (filtered) image that need this pixel value. The math of the filter is that of the usual bilateral filter, except that the sigma color is calculated in the neighborhood, and clamped by the optional input value.
Local memory organization
.. image:: images/adaptiveBilateralFilter.jpg
:height: 250pt
:width: 350pt
:alt: Introduction Icon
.. note:: We partition the image to non-overlapping blocks of size (Ux, Uy). Each such block will correspond to the pixel locations where we will calculate the filter result in one workgroup. Considering neighbourhoods of sizes (kx, ky), where kx = 2 dx + 1, and ky = 2 dy + 1 (in image ML, dx = dy = 1, and kx = ky = 3), it is clear that we need to load data of size Wx = Ux + 2 dx, Wy = Uy + 2 dy. Furthermore, if (Sx, Sy) is the top left pixel coordinates for a particular block, and (Sx + Ux - 1, Sy + Uy -1) is to botom right coordinate of the block, we need to load data starting at top left coordinate (PSx, PSy) = (Sx - dx, Sy - dy), and ending at bottom right coordinate (Sx + Ux - 1 + dx, Sy + Uy - 1 + dy). The workgroup layout is (Wx,1). However, to take advantage of the natural hardware properties (preferred wavefront sizes), we restrict Wx to be a multiple of that preferred wavefront size (for current AMD hardware this is typically 64). Each thread in the workgroup will load Wy elements (under the constraint that Wx*Wy*pixel width <= max local memory).
Applies bilateral filter to the image. Supports 8UC1 8UC3 data types.
Returns void
.. ocv:function:: void ocl::copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar())
:param src: The source image
:param dst: The destination image; will have the same type as src and the size size(src.cols+left+right, src.rows+top+bottom)
:param topbottomleftright: Specify how much pixels in each direction from the source image rectangle one needs to extrapolate, e.g. top=1, bottom=1, left=1, right=1mean that 1 pixel-wide border needs to be built
:param bordertype: Pixel extrapolation method.
:param value: The border value if borderType==BORDER CONSTANT
Forms a border around the image. Supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 data types.
Returns void
.. ocv:function:: void ocl::dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue())
:param src: The source image
:param dst: The destination image; It will have the same size and the same type as src
:param kernel: The structuring element used for dilation. If element=Mat(), a 3times 3 rectangular structuring element is used
:param anchor: Position of the anchor within the element. The default value (-1, -1) means that the anchor is at the element center, only default value is supported
:param iterations: The number of times dilation is applied
:param bordertype: Pixel extrapolation method.
:param value: The border value if borderType==BORDER CONSTANT
The function dilates the source image using the specified structuring element that determines the shape of a pixel neighborhood over which the maximum is taken. Supports 8UC1 8UC4 data types.
Returns void
.. ocv:function:: void ocl::erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue())
:param src: The source image
:param dst: The destination image; It will have the same size and the same type as src
:param kernel: The structuring element used for dilation. If element=Mat(), a 3times 3 rectangular structuring element is used
:param anchor: Position of the anchor within the element. The default value (-1, -1) means that the anchor is at the element center, only default value is supported
:param iterations: The number of times dilation is applied
:param bordertype: Pixel extrapolation method.
:param value: The border value if borderType==BORDER CONSTANT
The function erodes the source image using the specified structuring element that determines the shape of a pixel neighborhood over which the minimum is taken. Supports 8UC1 8UC4 data types.
Returns void
.. ocv:function:: void ocl::morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue())
:param src: The source image
:param dst: The destination image; It will have the same size and the same type as src
:param op: Type of morphological operation, one of the following: ERODE DILTATE OPEN CLOSE GRADIENT TOPHAT BLACKHAT
:param kernel: The structuring element used for dilation. If element=Mat(), a 3times 3 rectangular structuring element is used
:param anchor: Position of the anchor within the element. The default value (-1, -1) means that the anchor is at the element center, only default value is supported
:param iterations: The number of times dilation is applied
:param bordertype: Pixel extrapolation method.
:param value: The border value if borderType==BORDER CONSTANT
A wrapper for erode and dilate. Supports 8UC1 8UC4 data types.
Smoothes an image and downsamples it.
.. ocv:function:: void ocl::pyrDown(const oclMat& src, oclMat& dst)
:param src: Source image.
:param dst: Destination image. Will have ``Size((src.cols+1)/2, (src.rows+1)/2)`` size and the same type as ``src`` .
.. seealso:: :ocv:func:`pyrDown`
Upsamples an image and then smoothes it.
.. ocv:function:: void ocl::pyrUp(const oclMat& src, oclMat& dst)
:param src: Source image.
:param dst: Destination image. Will have ``Size(src.cols*2, src.rows*2)`` size and the same type as ``src`` .
.. seealso:: :ocv:func:`pyrUp`
Computes a vertical (column) sum.
.. ocv:function:: void ocl::columnSum(const oclMat& src, oclMat& sum)
:param src: Source image. Only ``CV_32FC1`` images are supported for now.
:param sum: Destination image of the ``CV_32FC1`` type.
Performs linear blending of two images.
.. ocv:function:: void ocl::blendLinear(const oclMat& img1, const oclMat& img2, const oclMat& weights1, const oclMat& weights2, oclMat& result)
:param img1: First image. Supports only ``CV_8U`` and ``CV_32F`` depth.
:param img2: Second image. Must have the same size and the same type as ``img1`` .
:param weights1: Weights for first image. Must have tha same size as ``img1`` . Supports only ``CV_32F`` type.
:param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.
:param result: Destination image.
Blurs an image using the median filter.
.. ocv:function:: void ocl::medianFilter(const oclMat &src, oclMat &dst, int m)
:param src: input ```1-``` or ```4```-channel image; the image depth should be ```CV_8U```, ```CV_32F```.
:param dst: destination array of the same size and type as ```src```.
:param m: aperture linear size; it must be odd and greater than ```1```. Currently only ```3```, ```5``` are supported.
The function smoothes an image using the median filter with the \texttt{m} \times \texttt{m} aperture. Each channel of a multi-channel image is processed independently. In-place operation is supported.
@ -1,347 +0,0 @@
Image Processing
.. highlight:: cpp
Performs mean-shift filtering for each point of the source image.
.. ocv:function:: void ocl::meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
:param src: Source image. Only ``CV_8UC4`` images are supported for now.
:param dst: Destination image containing the color of mapped points. It has the same size and type as ``src`` .
:param sp: Spatial window radius.
:param sr: Color window radius.
:param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
It maps each point of the source image into another point. As a result, you have a new color and new position of each point.
Performs a mean-shift procedure and stores information about processed points (their colors and positions) in two images.
.. ocv:function:: void ocl::meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
:param src: Source image. Only ``CV_8UC4`` images are supported for now.
:param dstr: Destination image containing the color of mapped points. The size and type is the same as ``src`` .
:param dstsp: Destination image containing the position of mapped points. The size is the same as ``src`` size. The type is ``CV_16SC2`` .
:param sp: Spatial window radius.
:param sr: Color window radius.
:param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
.. seealso:: :ocv:func:`ocl::meanShiftFiltering`
Performs a mean-shift segmentation of the source image and eliminates small segments.
.. ocv:function:: void ocl::meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1))
:param src: Source image. Only ``CV_8UC4`` images are supported for now.
:param dst: Segmented image with the same size and type as ``src`` .
:param sp: Spatial window radius.
:param sr: Color window radius.
:param minsize: Minimum segment size. Smaller segments are merged.
:param criteria: Termination criteria. See :ocv:class:`TermCriteria`.
Computes an integral image.
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1)
.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, int sdepth=-1)
:param src: Source image. Only ``CV_8UC1`` images are supported for now.
:param sum: Integral image containing 32-bit unsigned integer or 32-bit floating-point .
:param sqsum: Sqsum values is ``CV_32FC1`` or ``CV_64FC1`` type.
.. seealso:: :ocv:func:`integral`
Returns void
.. ocv:function:: void ocl::cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT)
:param src: Source image. Only CV_8UC1 and CV_32FC1 images are supported now.
:param dst: Destination image containing cornerness values. It has the same size as src and CV_32FC1 type.
:param blockSize: Neighborhood size
:param ksize: Aperture parameter for the Sobel operator
:param k: Harris detector free parameter
:param bordertype: Pixel extrapolation method. Only BORDER_REFLECT101, BORDER_REFLECT, BORDER_CONSTANT and BORDER_REPLICATE are supported now.
Calculate Harris corner.
Returns void
.. ocv:function:: void ocl::cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT)
:param src: Source image. Only CV_8UC1 and CV_32FC1 images are supported now.
:param dst: Destination image containing cornerness values. It has the same size as src and CV_32FC1 type.
:param blockSize: Neighborhood size
:param ksize: Aperture parameter for the Sobel operator
:param bordertype: Pixel extrapolation method. Only BORDER_REFLECT101, BORDER_REFLECT, BORDER_CONSTANT and BORDER_REPLICATE are supported now.
Calculate MinEigenVal.
Returns void
.. ocv:function:: void ocl::calcHist(const oclMat &mat_src, oclMat &mat_hist)
:param src: Source arrays. They all should have the same depth, CV 8U, and the same size. Each of them can have an arbitrary number of channels.
:param dst: The output histogram, a dense or sparse dims-dimensional
Calculates histogram of one or more arrays. Supports only 8UC1 data type.
Equalizes the histogram of a grayscale image.
.. ocv:function:: void ocl::equalizeHist(const oclMat &mat_src, oclMat &mat_dst)
:param mat_src: Source image.
:param mat_dst: Destination image.
.. seealso:: :ocv:func:`equalizeHist`
Returns void
.. ocv:function:: void ocl::remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar())
:param src: Source image.
:param dst: Destination image containing cornerness values. It has the same size as src and CV_32FC1 type.
:param map1: The first map of either (x,y) points or just x values having the type CV_16SC2 , CV_32FC1 , or CV_32FC2 . See covertMaps() for details on converting a floating point representation to fixed-point for speed.
:param map2: The second map of y values having the type CV_32FC1 , or none (empty map if map1 is (x,y) points), respectively.
:param interpolation: The interpolation method
:param bordertype: Pixel extrapolation method.
:param value: The border value if borderType==BORDER CONSTANT
The function remap transforms the source image using the specified map: dst (x ,y) = src (map1(x , y) , map2(x , y)) where values of pixels with non-integer coordinates are computed using one of available interpolation methods. map1 and map2 can be encoded as separate floating-point maps in map1 and map2 respectively, or interleaved floating-point maps of (x,y) in map1.
Returns void
.. ocv:function:: void ocl::resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR)
:param src: Source image.
:param dst: Destination image.
:param dsize: he destination image size. If it is zero, then it is computed as: dsize = Size(round(fx*src.cols), round(fy*src.rows)). Either dsize or both fx or fy must be non-zero.
:param fx: The scale factor along the horizontal axis. When 0, it is computed as (double)dsize.width/src.cols
:param fy: The scale factor along the vertical axis. When 0, it is computed as (double)dsize.height/src.rows
:param interpolation: The interpolation method: INTER NEAREST or INTER LINEAR
Resizes an image. Supports CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1 , CV_32FC3 and CV_32FC4 data types.
Returns void
.. ocv:function:: void ocl::warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR)
:param src: Source image.
:param dst: Destination image.
:param M: 2times 3 transformation matrix
:param dsize: Size of the destination image
:param flags: A combination of interpolation methods, see cv::resize, and the optional flag WARP INVERSE MAP that means that M is the inverse transformation (dst to $src)
The function warpAffine transforms the source image using the specified matrix. Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC types.
Returns void
.. ocv:function:: void ocl::warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR)
:param src: Source image.
:param dst: Destination image.
:param M: 2times 3 transformation matrix
:param dsize: Size of the destination image
:param flags: A combination of interpolation methods, see cv::resize, and the optional flag WARP INVERSE MAP that means that M is the inverse transformation (dst to $src)
Applies a perspective transformation to an image. Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC types.
Returns void
.. ocv:function:: void ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn = 0)
:param src: Source image.
:param dst: Destination image.
:param code:The color space conversion code
:param dcn: The number of channels in the destination image; if the parameter is 0, the number of the channels will be derived automatically from src and the code
Converts image from one color space to another.For now, only RGB2GRAY is supportted. Supports.CV_8UC1,CV_8UC4,CV_32SC1,CV_32SC4,CV_32FC1,CV_32FC4
Returns Threshold value
.. ocv:function:: double ocl::threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC)
:param src: The source array
:param dst: Destination array; will have the same size and the same type as src
:param thresh: Threshold value
:param maxVal: Maximum value to use with THRESH BINARY and THRESH BINARY INV thresholding types
:param type: Thresholding type
The function applies fixed-level thresholding to a single-channel array. The function is typically used to get a bi-level (binary) image out of a grayscale image or for removing a noise, i.e. filtering out pixels with too small or too large values. There are several types of thresholding that the function supports that are determined by thresholdType.
Builds plane warping maps.
.. ocv:function:: void ocl::buildWarpPlaneMaps( Size src_size, Rect dst_roi, const Mat& K, const Mat& R, const Mat& T, float scale, oclMat& map_x, oclMat& map_y )
Builds cylindrical warping maps.
.. ocv:function:: void ocl::buildWarpCylindricalMaps( Size src_size, Rect dst_roi, const Mat& K, const Mat& R, float scale, oclMat& map_x, oclMat& map_y )
Builds spherical warping maps.
.. ocv:function:: void ocl::buildWarpSphericalMaps( Size src_size, Rect dst_roi, const Mat& K, const Mat& R, float scale, oclMat& map_x, oclMat& map_y )
Builds transformation maps for perspective transformation.
.. ocv:function:: void ocl::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, oclMat& xmap, oclMat& ymap)
:param M: *3x3* transformation matrix.
:param inverse: Flag specifying that ``M`` is an inverse transformation ( ``dst=>src`` ).
:param dsize: Size of the destination image.
:param xmap: X values with ``CV_32FC1`` type.
:param ymap: Y values with ``CV_32FC1`` type.
.. seealso:: :ocv:func:`ocl::warpPerspective` , :ocv:func:`ocl::remap`
Builds transformation maps for affine transformation.
.. ocv:function:: void ocl::buildWarpAffineMaps(const Mat& M, bool inverse, Size dsize, oclMat& xmap, oclMat& ymap)
:param M: *2x3* transformation matrix.
:param inverse: Flag specifying that ``M`` is an inverse transformation ( ``dst=>src`` ).
:param dsize: Size of the destination image.
:param xmap: X values with ``CV_32FC1`` type.
:param ymap: Y values with ``CV_32FC1`` type.
.. seealso:: :ocv:func:`ocl::warpAffine` , :ocv:func:`ocl::remap`
Finds circles in a grayscale image using the Hough transform.
.. ocv:function:: void ocl::HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
.. ocv:function:: void ocl::HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096)
:param src: 8-bit, single-channel grayscale input image.
:param circles: Output vector of found circles. Each vector is encoded as a 3-element floating-point vector :math:`(x, y, radius)` .
:param method: Detection method to use. Currently, the only implemented method is ``CV_HOUGH_GRADIENT`` , which is basically *21HT* , described in [Yuen90]_.
:param dp: Inverse ratio of the accumulator resolution to the image resolution. For example, if ``dp=1`` , the accumulator has the same resolution as the input image. If ``dp=2`` , the accumulator has half as big width and height.
:param minDist: Minimum distance between the centers of the detected circles. If the parameter is too small, multiple neighbor circles may be falsely detected in addition to a true one. If it is too large, some circles may be missed.
:param cannyThreshold: The higher threshold of the two passed to the :ocv:func:`ocl::Canny` edge detector (the lower one is twice smaller).
:param votesThreshold: The accumulator threshold for the circle centers at the detection stage. The smaller it is, the more false circles may be detected.
:param minRadius: Minimum circle radius.
:param maxRadius: Maximum circle radius.
:param maxCircles: Maximum number of output circles.
:param buf: Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
.. note:: Currently only non-ROI oclMat is supported for src.
.. seealso:: :ocv:func:`HoughCircles`
Binary file not shown.
Before Width: | Height: | Size: 64 KiB |
@ -1,73 +0,0 @@
OpenCL Module Introduction
.. highlight:: cpp
General Information
The OpenCV OCL module contains a set of classes and functions that implement and accelerate OpenCV functionality on OpenCL compatible devices. OpenCL is a Khronos standard, implemented by a variety of devices (CPUs, GPUs, FPGAs, ARM), abstracting the exact hardware details, while enabling vendors to provide native implementation for maximal acceleration on their hardware. The standard enjoys wide industry support, and the end user of the module will enjoy the data parallelism benefits that the specific platform/hardware may be capable of, in a platform/hardware independent manner.
While in the future we hope to validate (and enable) the OCL module in all OpenCL capable devices, we currently develop and test on GPU devices only. This includes both discrete GPUs (NVidia, AMD), as well as integrated chips (AMD APU and Intel HD devices). Performance of any particular algorithm will depend on the particular platform characteristics and capabilities. However, currently, accuracy and mathematical correctness has been verified to be identical to that of the pure CPU implementation on all tested GPU devices and platforms (both Windows and Linux).
The OpenCV OCL module includes utility functions, low-level vision primitives, and high-level algorithms. The utility functions and low-level primitives provide a powerful infrastructure for developing fast vision algorithms taking advantage of OCL, whereas the high-level functionality (samples) includes some state-of-the-art algorithms (including LK Optical flow, and Face detection) ready to be used by the application developers. The module is also accompanied by an extensive performance and accuracy test suite.
The OpenCV OCL module is designed for ease of use and does not require any knowledge of OpenCL. At a minimum level, it can be viewed as a set of accelerators, that can take advantage of the high compute throughput that GPU/APU devices can provide. However, it can also be viewed as a starting point to really integrate the built-in functionality with your own custom OpenCL kernels, with or without modifying the source of OpenCV-OCL. Of course, knowledge of OpenCL will certainly help, however we hope that OpenCV-OCL module, and the kernels it contains in source code, can be very useful as a means of actually learning openCL. Such a knowledge would be necessary to further fine-tune any of the existing OpenCL kernels, or for extending the framework with new kernels. As of OpenCV 2.4.4, we introduce interoperability with OpenCL, enabling easy use of custom OpenCL kernels within the OpenCV framework.
To correctly run the OCL module, you need to have the OpenCL runtime provided by the device vendor, typically the device driver.
To enable OCL support, configure OpenCV using CMake with ``WITH_OPENCL=ON``. When the flag is set and if OpenCL SDK is installed, the full-featured OpenCV OCL module is built. Otherwise, the module may be not built. If you have AMD'S FFT and BLAS library, you can select it with ``WITH_OPENCLAMDFFT=ON``, ``WITH_OPENCLAMDBLAS=ON``.
The ocl module can be found under the "modules" directory. In "modules/ocl/src" you can find the source code for the cpp class that wrap around the direct kernel invocation. The kernels themselves can be found in "modules/ocl/src/opencl". Samples can be found under "samples/ocl". Accuracy tests can be found in "modules/ocl/test", and performance tests under "module/ocl/perf".
Right now, the user can select OpenCL device by specifying the environment variable ``OPENCV_OPENCL_DEVICE``. Variable format:
.. code-block:: cpp
<Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<DeviceName or ID>
**Note:** Device ID range is: 0..9 (only one digit, 10 - it is a part of name)
.. code-block:: cpp
'' = ':' = '::' = ':GPU|CPU:'
Also the user can use ``cv::ocl::setDevice`` function (with ``cv::ocl::getOpenCLPlatforms`` and ``cv::ocl::getOpenCLDevices``). This function initializes OpenCL runtime and setup the passed device as computing device.
In the current version, all the thread share the same context and device so the multi-devices are not supported. We will add this feature soon. If a function support 4-channel operator, it should support 3-channel operator as well, because All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in ``oclMat``. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and ``oclMat``.
Developer Notes
In a heterogeneous device environment, there may be cost associated with data transfer. This would be the case, for example, when data needs to be moved from host memory (accessible to the CPU), to device memory (accessible to a discrete GPU). in the case of integrated graphics chips, there may be performance issues, relating to memory coherency between access from the GPU "part" of the integrated device, or the CPU "part." For best performance, in either case, it is recommended that you do not introduce data transfers between CPU and the discrete GPU, except in the beginning and the end of the algorithmic pipeline.
Some tidbits:
1. OpenCL version should be larger than 1.1 with FULL PROFILE.
2. Currently there's only one OpenCL context and command queue. We hope to implement multi device and multi queue support in the future.
3. Many kernels use 256 as its workgroup size if possible, so the max work group size of the device must larger than 256. All GPU devices we are aware of indeed support 256 workitems in a workgroup, however non GPU devices may not. This will be improved in the future.
4. If the device does not support double arithmetic, then functions' implementation generates an error.
5. The ``oclMat`` uses buffer object, not image object.
6. All the 3-channel matrices (i.e. RGB image) are represented by 4-channel matrices in ``oclMat``, with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and ``oclMat``.
7. All the matrix in ``oclMat`` is aligned in column (now the alignment factor for ``step`` is 32+ byte). It means, m.cols * m.elemSize() <= m.step.
8. Data transfer between Mat and ``oclMat``: If the CPU matrix is aligned in column, we will use faster API to transfer between Mat and ``oclMat``, otherwise, we will use clEnqueueRead/WriteBufferRect to transfer data to guarantee the alignment. 3-channel matrix is an exception, it's directly transferred to a temp buffer and then padded to 4-channel matrix(also aligned) when uploading and do the reverse operation when downloading.
9. Data transfer between Mat and ``oclMat``: ROI is a feature of OpenCV, which allow users process a sub rectangle of a matrix. When a CPU matrix which has ROI will be transfered to GPU, the whole matrix will be transfered and set ROI as CPU's. In a word, we always transfer the whole matrix despite whether it has ROI or not.
10. All the kernel file should locate in "modules/ocl/src/opencl/" with the extension ".cl". All the kernel files are transformed to pure characters at compilation time in opencl_kernels.cpp, and the file name without extension is the name of the program sources.
@ -1,106 +0,0 @@
Matrix Reductions
.. highlight:: cpp
Returns the sum of absolute values for matrix elements.
.. ocv:function:: Scalar ocl::absSum(const oclMat &m)
:param m: The Source image of all depth.
Counts the abs sum of matrix elements for each channel. Supports all data types.
Returns the number of non-zero elements in src
.. ocv:function:: int ocl::countNonZero(const oclMat &src)
:param src: Single-channel array
Counts non-zero array elements. Supports all data types.
.. ocv:function:: void ocl::min(const oclMat &src1, const oclMat &src2, oclMat &dst)
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param dst: the destination array, it will have the same size and same type as ``src1``.
Computes element-wise minima of two arrays. Supports all data types.
.. ocv:function:: void ocl::max(const oclMat &src1, const oclMat &src2, oclMat &dst)
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param dst: the destination array, it will have the same size and same type as ``src1``.
Computes element-wise maxima of two arrays. Supports all data types.
Returns void
.. ocv:function:: void ocl::minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat())
:param src: Single-channel array
:param minVal: Pointer to returned minimum value, should not be NULL
:param maxVal: Pointer to returned maximum value, should not be NULL
:param mask: The optional mask used to select a sub-array
Finds global minimum and maximum in a whole array or sub-array. Supports all data types.
Returns void
.. ocv:function:: void ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0,const oclMat &mask = oclMat())
:param src: Single-channel array
:param minVal: Pointer to returned minimum value, should not be NULL
:param maxVal: Pointer to returned maximum value, should not be NULL
:param minLoc: Pointer to returned minimum location (in 2D case), should not be NULL
:param maxLoc: Pointer to returned maximum location (in 2D case) should not be NULL
:param mask: The optional mask used to select a sub-array
The functions minMaxLoc find minimum and maximum element values and their positions. The extremums are searched across the whole array, or, if mask is not an empty array, in the specified array region. The functions do not work with multi-channel arrays.
Returns the squared sum of matrix elements for each channel
.. ocv:function:: Scalar ocl::sqrSum(const oclMat &m)
:param m: The Source image of all depth.
Counts the squared sum of matrix elements for each channel. Supports all data types.
Returns the sum of matrix elements for each channel
.. ocv:function:: Scalar ocl::sum(const oclMat &m)
:param m: The Source image of all depth.
Counts the sum of matrix elements for each channel.
@ -1,106 +0,0 @@
ml.Machine Learning
.. highlight:: cpp
.. ocv:class:: ocl::KNearestNeighbour : public ocl::CvKNearest
The class implements K-Nearest Neighbors model as described in the beginning of this section.
Computes the weighted sum of two arrays. ::
class CV_EXPORTS KNearestNeighbour: public CvKNearest
bool train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)),
bool isRegression = false, int max_k = 32, bool updateBase = false);
void clear();
void find_nearest(const oclMat& samples, int k, oclMat& lables);
/* hidden */
Trains the model.
.. ocv:function:: bool ocl::KNearestNeighbour::train(const Mat& trainData, Mat& labels, Mat& sampleIdx = Mat().setTo(Scalar::all(0)), bool isRegression = false, int max_k = 32, bool updateBase = false)
:param isRegression: Type of the problem: ``true`` for regression and ``false`` for classification.
:param maxK: Number of maximum neighbors that may be passed to the method :ocv:func:`CvKNearest::find_nearest`.
:param updateBase: Specifies whether the model is trained from scratch (``update_base=false``), or it is updated using the new training data (``update_base=true``). In the latter case, the parameter ``maxK`` must not be larger than the original value.
The method trains the K-Nearest model. It follows the conventions of the generic :ocv:func:`CvStatModel::train` approach with the following limitations:
* Only ``CV_ROW_SAMPLE`` data layout is supported.
* Input variables are all ordered.
* Output variables can be either categorical ( ``is_regression=false`` ) or ordered ( ``is_regression=true`` ).
* Variable subsets (``var_idx``) and missing measurements are not supported.
Finds the neighbors and predicts responses for input vectors.
.. ocv:function:: void ocl::KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lables )
:param samples: Input samples stored by rows. It is a single-precision floating-point matrix of :math:`number\_of\_samples \times number\_of\_features` size.
:param k: Number of used nearest neighbors. It must satisfy constraint: :math:`k \le` :ocv:func:`CvKNearest::get_max_k`.
:param labels: Vector with results of prediction (regression or classification) for each input sample. It is a single-precision floating-point vector with ``number_of_samples`` elements.
Finds centers of clusters and groups input samples around the clusters.
.. ocv:function:: double ocl::kmeans(const oclMat &src, int K, oclMat &bestLabels, TermCriteria criteria, int attemps, int flags, oclMat ¢ers)
:param src: Floating-point matrix of input samples, one row per sample.
:param K: Number of clusters to split the set by.
:param bestLabels: Input/output integer array that stores the cluster indices for every sample.
:param criteria: The algorithm termination criteria, that is, the maximum number of iterations and/or the desired accuracy. The accuracy is specified as ``criteria.epsilon``. As soon as each of the cluster centers moves by less than ``criteria.epsilon`` on some iteration, the algorithm stops.
:param attempts: Flag to specify the number of times the algorithm is executed using different initial labellings. The algorithm returns the labels that yield the best compactness (see the last function parameter).
:param flags: Flag that can take the following values:
* **KMEANS_RANDOM_CENTERS** Select random initial centers in each attempt.
* **KMEANS_PP_CENTERS** Use ``kmeans++`` center initialization by Arthur and Vassilvitskii [Arthur2007].
* **KMEANS_USE_INITIAL_LABELS** During the first (and possibly the only) attempt, use the user-supplied labels instead of computing them from the initial centers. For the second and further attempts, use the random or semi-random centers. Use one of ``KMEANS_*_CENTERS`` flag to specify the exact method.
:param centers: Output matrix of the cluster centers, one row per each cluster center.
For each samples in ``source``, find its closest neighour in ``centers``.
.. ocv:function:: void ocl::distanceToCenters(const oclMat &src, const oclMat ¢ers, Mat &dists, Mat &labels, int distType = NORM_L2SQR)
:param src: Floating-point matrix of input samples. One row per sample.
:param centers: Floating-point matrix of center candidates. One row per center.
:param distType: Distance metric to calculate distances. Supports ``NORM_L1`` and ``NORM_L2SQR``.
:param dists: The output distances calculated from each sample to the best matched center.
:param labels: The output index of best matched center for each row of sample.
The method is a utility function which maybe used for multiple clustering algorithms such as K-means.
@ -1,95 +0,0 @@
Object Detection
.. highlight:: cpp
.. ocv:class:: ocl::OclCascadeClassifier : public CascadeClassifier
Cascade classifier class used for object detection. Supports HAAR cascade classifier in the form of cross link ::
class CV_EXPORTS OclCascadeClassifier : public CascadeClassifier
void detectMultiScale(oclMat &image, CV_OUT std::vector<cv::Rect>& faces,
double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0,
Size minSize = Size(), Size maxSize = Size());
.. note::
(Ocl) A face detection example using cascade classifiers can be found at opencv_source_code/samples/ocl/facedetect.cpp
Detects objects of different sizes in the input image.
.. ocv:function:: void ocl::OclCascadeClassifier::detectMultiScale(oclMat &image, std::vector<cv::Rect>& faces, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, Size minSize = Size(), Size maxSize = Size())
:param faces: Vector of rectangles where each rectangle contains the detected object.
:param image: Matrix of type CV_8U containing an image where objects should be detected.
:param scaleFactor: Parameter specifying how much the image size is reduced at each image scale.
:param minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it.
:param flags: Parameter with the same meaning for an old cascade as in the function ``cvHaarDetectObjects``. It is not used for a new cascade.
:param minSize: Minimum possible object size. Objects smaller than that are ignored.
:param maxSize: Maximum possible object size. Objects larger than that are ignored.
The function provides a very similar interface with that in CascadeClassifier class, except using oclMat as input image.
.. ocv:struct:: ocl::MatchTemplateBuf
Class providing memory buffers for :ocv:func:`ocl::matchTemplate` function, plus it allows to adjust some specific parameters. ::
struct CV_EXPORTS MatchTemplateBuf
Size user_block_size;
oclMat imagef, templf;
std::vector<oclMat> images;
std::vector<oclMat> image_sums;
std::vector<oclMat> image_sqsums;
You can use field `user_block_size` to set specific block size for :ocv:func:`ocl::matchTemplate` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed.
Computes a proximity map for a raster template and an image where the template is searched for.
.. ocv:function:: void ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method)
.. ocv:function:: void ocl::matchTemplate(const oclMat& image, const oclMat& templ, oclMat& result, int method, MatchTemplateBuf &buf)
:param image: Source image. ``CV_32F`` and ``CV_8U`` depth images (1..4 channels) are supported for now.
:param templ: Template image with the size and type the same as ``image`` .
:param result: Map containing comparison results ( ``CV_32FC1`` ). If ``image`` is *W x H* and ``templ`` is *w x h*, then ``result`` must be *W-w+1 x H-h+1*.
:param method: Specifies the way to compare the template with the image.
:param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`ocl::MatchTemplateBuf`.
The following methods are supported for the ``CV_8U`` depth images for now:
The following methods are supported for the ``CV_32F`` images for now:
.. seealso:: :ocv:func:`matchTemplate`
@ -1,21 +0,0 @@
ocl. OpenCL-accelerated Computer Vision
.. toctree::
:maxdepth: 1
.. camera_calibration_and_3d_reconstruction
.. video
@ -1,602 +0,0 @@
Operations on Matrics
.. highlight:: cpp
Returns void
.. ocv:function:: void ocl::abs(const oclMat& src, oclMat& dst)
:param src: input array.
:param dst: destination array, it will have the same size and same type as ``src``.
Computes per-element absolute values of the input array. Supports all data types.
Returns void
.. ocv:function:: void ocl::absdiff(const oclMat& src1, const oclMat& src2, oclMat& dst)
.. ocv:function:: void ocl::absdiff(const oclMat& src1, const Scalar& s, oclMat& dst)
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param s: scalar, the second input parameter.
:param dst: the destination array, it will have the same size and same type as ``src1``.
Computes per-element absolute difference between two arrays or between array and a scalar. Supports all data types.
Returns void
.. ocv:function:: void ocl::add(const oclMat & src1, const oclMat & src2, oclMat & dst, const oclMat & mask = oclMat())
.. ocv:function:: void ocl::add(const oclMat & src1, const Scalar & s, oclMat & dst, const oclMat & mask = oclMat())
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param s: scalar, the second input parameter
:param dst: the destination array, it will have the same size and same type as ``src1``.
:param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
Computes per-element additon between two arrays or between array and a scalar. Supports all data types.
Computes the weighted sum of two arrays.
.. ocv:function:: void ocl::addWeighted(const oclMat& src1, double alpha, const oclMat& src2, double beta, double gama, oclMat& dst)
:param src1: First source array.
:param alpha: Weight for the first array elements.
:param src2: Second source array of the same size and channel number as ``src1`` .
:param beta: Weight for the second array elements.
:param dst: Destination array that has the same size and number of channels as the input arrays.
:param gamma: Scalar added to each sum.
The function ``addWeighted`` calculates the weighted sum of two arrays as follows:
.. math::
\texttt{c} (I)= \texttt{saturate} ( \texttt{a} (I)* \texttt{alpha} + \texttt{b} (I)* \texttt{beta} + \texttt{gamma} )
where ``I`` is a multi-dimensional index of array elements. In case of multi-channel arrays, each channel is processed independently.
.. seealso:: :ocv:func:`addWeighted`
Returns void
.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::bitwise_and(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param s: scalar, the second input parameter.
:param dst: the destination array, it will have the same size and same type as ``src1``.
:param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
Computes per-element bitwise_and between two arrays or between array and a scalar. Supports all data types.
Returns void
.. ocv:function:: void ocl::bitwise_not(const oclMat &src, oclMat &dst)
:param src: the input array.
:param dst: the destination array, it will have the same size and same type as ``src``.
The functions bitwise not compute per-element bit-wise inversion of the source array. Supports all data types.
Returns void
.. ocv:function:: void ocl::bitwise_or(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::bitwise_or(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param s: scalar, the second input parameter.
:param dst: the destination array, it will have the same size and same type as ``src1``.
:param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
Computes per-element bitwise_or between two arrays or between array and a scalar. Supports all data types.
Returns void
.. ocv:function:: void ocl::bitwise_xor(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::bitwise_xor(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param sc: scalar, the second input parameter.
:param dst: the destination array, it will have the same size and same type as ``src1``.
:param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
Computes per-element bitwise_xor between two arrays or between array and a scalar. Supports all data types.
Returns void
.. ocv:function:: void ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false)
:param x: the array of x-coordinates; must be single-precision or double-precision floating-point array.
:param y: the array of y-coordinates; it must have the same size and same type as ``x``.
:param magnitude: the destination array of magnitudes of the same size and same type as ``x``.
:param angle: the destination array of angles of the same size and same type as ``x``. The angles are measured in radians (0 to 2pi) or in degrees (0 to 360 degrees).
:param angleInDegrees: the flag indicating whether the angles are measured in radians, which is default mode, or in degrees.
Calculates the magnitude and angle of 2D vectors. Supports only ``CV_32F`` and ``CV_64F`` data types.
Returns void
.. ocv:function:: void ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpop)
:param src1: the first source array.
:param src2: the second source array; must have the same size and same type as ``src1``.
:param dst: the destination array; will have the same size as ``src1`` and type ``CV_8UC1``.
:param cmpop: the flag specifying the relation between the elements to be checked.
Performs per-element comparison of two arrays or an array and scalar value. Supports all data types.
Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix.
.. ocv:function:: void ocl::dft(const oclMat& src, oclMat& dst, Size dft_size = Size(), int flags = 0)
:param src: source matrix (real or complex).
:param dst: destination matrix (real or complex).
:param dft_size: size of original input, which is used for transformation from complex to real.
:param flags: optional flags:
* **DFT_ROWS** transforms each individual row of the source matrix.
* **DFT_COMPLEX_OUTPUT** performs a forward transformation of 1D or 2D real array. The result, though being a complex array, has complex-conjugate symmetry (*CCS*, see the function description below for details). Such an array can be packed into a real array of the same size as input, which is the fastest option and which is what the function does by default. However, you may wish to get a full complex array (for simpler spectrum analysis, and so on). Pass the flag to enable the function to produce a full-size complex output array.
* **DFT_INVERSE** inverts DFT. Use for complex-complex cases (real-complex and complex-real cases are always forward and inverse, respectively).
* **DFT_REAL_OUTPUT** specifies the output as real. The source matrix is the result of real-complex transform, so the destination matrix must be real.
Use to handle real matrices (``CV_32FC1``) and complex matrices in the interleaved format (``CV_32FC2``).
The ``dft_size`` must be powers of ``2``, ``3`` and ``5``. Real to complex dft output is not the same with cpu version. Real to complex and complex to real does not support ``DFT_ROWS``.
.. seealso:: :ocv:func:`dft`
Returns void
.. ocv:function:: void ocl::divide(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1)
.. ocv:function:: void ocl::divide(double scale, const oclMat& src1, oclMat& dst)
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param dst: the destination array, it will have the same size and same type as ``src1``.
:param scale: scalar factor.
Computes per-element divide between two arrays or between array and a scalar. Supports all data types.
Returns void
.. ocv:function:: void ocl::exp(const oclMat &src, oclMat &dst)
:param src: the first source array.
:param dst: the dst array; must have the same size and same type as ``src``.
The function exp calculates the exponent of every element of the input array. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
Returns void
.. ocv:function:: void ocl::flip(const oclMat& src, oclMat& dst, int flipCode)
:param src: source image.
:param dst: destination image.
:param flipCode: specifies how to flip the array: 0 means flipping around the x-axis, positive (e.g., 1) means flipping around y-axis, and negative (e.g., -1) means flipping around both axes.
The function flip flips the array in one of three different ways (row and column indices are 0-based). Supports all data types.
Performs generalized matrix multiplication.
.. ocv:function:: void ocl::gemm(const oclMat& src1, const oclMat& src2, double alpha, const oclMat& src3, double beta, oclMat& dst, int flags = 0)
:param src1: first multiplied input matrix that should be ``CV_32FC1`` type.
:param src2: second multiplied input matrix of the same type as ``src1``.
:param alpha: weight of the matrix product.
:param src3: third optional delta matrix added to the matrix product. It should have the same type as ``src1`` and ``src2``.
:param beta: weight of ``src3``.
:param dst: destination matrix. It has the proper size and the same type as input matrices.
:param flags: operation flags:
* **GEMM_1_T** transpose ``src1``.
* **GEMM_2_T** transpose ``src2``.
.. seealso:: :ocv:func:`gemm`
Returns void
.. ocv:function:: void ocl::log(const oclMat &src, oclMat &dst)
:param src: the first source array.
:param dst: the dst array; must have the same size and same type as ``src``.
The function log calculates the log of every element of the input array. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
Returns void
.. ocv:function:: void ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
:param src: source array of 8-bit elements.
:param lut: look-up table of 256 elements. In the case of multi-channel source array, the table should either have a single channel (in this case the same table is used for all channels) or the same number of channels as in the source array.
:param dst: destination array; will have the same size and the same number of channels as ``src``, and the same depth as ``lut``.
Performs a look-up table transform of an array.
Returns void
.. ocv:function:: void ocl::magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude)
:param x: the floating-point array of x-coordinates of the vectors.
:param y: the floating-point array of y-coordinates of the vectors; must have the same size as ``x``.
:param magnitude: the destination array; will have the same size and same type as ``x``.
The function magnitude calculates magnitude of 2D vectors formed from the corresponding elements of ``x`` and ``y`` arrays. Supports only ``CV_32F`` and ``CV_64F`` data types.
Returns void
.. ocv:function:: void ocl::meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev)
:param mtx: source image.
:param mean: the output parameter: computed mean value.
:param stddev: the output parameter: computed standard deviation.
The functions meanStdDev compute the mean and the standard deviation M of array elements, independently for each channel, and return it via the output parameters. Supports all data types.
Returns void
.. ocv:function:: void ocl::merge(const vector<oclMat> &src, oclMat &dst)
:param src: The source array or vector of the single-channel matrices to be merged. All the matrices in src must have the same size and the same type.
:param dst: The destination array; will have the same size and the same depth as src, the number of channels will match the number of source matrices.
Composes a multi-channel array from several single-channel arrays. Supports all data types.
Returns void
.. ocv:function:: void ocl::multiply(const oclMat& src1, const oclMat& src2, oclMat& dst, double scale = 1)
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param dst: the destination array, it will have the same size and same type as ``src1``.
:param scale: optional scale factor.
Computes per-element multiply between two arrays or between array and a scalar. Supports all data types.
Returns the calculated norm
.. ocv:function:: double ocl::norm(const oclMat &src1, int normType = NORM_L2)
.. ocv:function:: double ocl::norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2)
:param src1: the first source array.
:param src2: the second source array of the same size and the same type as ``src1``.
:param normType: type of the norm.
The functions ``norm`` calculate an absolute norm of ``src1`` (when there is no ``src2`` ):
.. math::
norm = \forkthree{\|\texttt{src1}\|_{L_{\infty}} = \max _I | \texttt{src1} (I)|}{if $\texttt{normType} = \texttt{NORM\_INF}$ }
{ \| \texttt{src1} \| _{L_1} = \sum _I | \texttt{src1} (I)|}{if $\texttt{normType} = \texttt{NORM\_L1}$ }
{ \| \texttt{src1} \| _{L_2} = \sqrt{\sum_I \texttt{src1}(I)^2} }{if $\texttt{normType} = \texttt{NORM\_L2}$ }
or an absolute or relative difference norm if ``src2`` is there:
.. math::
norm = \forkthree{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} = \max _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if $\texttt{normType} = \texttt{NORM\_INF}$ }
{ \| \texttt{src1} - \texttt{src2} \| _{L_1} = \sum _I | \texttt{src1} (I) - \texttt{src2} (I)|}{if $\texttt{normType} = \texttt{NORM\_L1}$ }
{ \| \texttt{src1} - \texttt{src2} \| _{L_2} = \sqrt{\sum_I (\texttt{src1}(I) - \texttt{src2}(I))^2} }{if $\texttt{normType} = \texttt{NORM\_L2}$ }
.. math::
norm = \forkthree{\frac{\|\texttt{src1}-\texttt{src2}\|_{L_{\infty}} }{\|\texttt{src2}\|_{L_{\infty}} }}{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_INF}$ }
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_1} }{\|\texttt{src2}\|_{L_1}} }{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_L1}$ }
{ \frac{\|\texttt{src1}-\texttt{src2}\|_{L_2} }{\|\texttt{src2}\|_{L_2}} }{if $\texttt{normType} = \texttt{NORM\_RELATIVE\_L2}$ }
The functions ``norm`` return the calculated norm.
A multi-channel input arrays are treated as a single-channel, that is, the results for all channels are combined.
Returns void
.. ocv:function:: void ocl::oclMat::convertTo(oclMat &m, int rtype, double alpha = 1, double beta = 0) const
:param m: the destination matrix. If it does not have a proper size or type before the operation, it will be reallocated.
:param rtype: the desired destination matrix type, or rather, the depth (since the number of channels will be the same with the source one). If rtype is negative, the destination matrix will have the same type as the source.
:param alpha: optional scale factor.
:param beta: optional delta added to the scaled values.
The method converts source pixel values to the target datatype. Saturate cast is applied in the end to avoid possible overflows. Supports all data types.
Returns void
.. ocv:function:: void ocl::oclMat::copyTo(oclMat &m, const oclMat &mask = oclMat()) const
:param m: The destination matrix. If it does not have a proper size or type before the operation, it will be reallocated.
:param mask: The operation mask. Its non-zero elements indicate, which matrix elements need to be copied.
Copies the matrix to another one. Supports all data types.
Returns oclMat
.. ocv:function:: oclMat& ocl::oclMat::setTo(const Scalar &s, const oclMat &mask = oclMat())
:param s: Assigned scalar, which is converted to the actual array type.
:param mask: The operation mask of the same size as ``*this`` and type ``CV_8UC1``.
Sets all or some of the array elements to the specified value. This is the advanced variant of Mat::operator=(const Scalar s) operator. Supports all data types.
Returns void
.. ocv:function:: void ocl::phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false)
:param x: the source floating-point array of x-coordinates of 2D vectors
:param y: the source array of y-coordinates of 2D vectors; must have the same size and the same type as ``x``.
:param angle: the destination array of vector angles; it will have the same size and same type as ``x``.
:param angleInDegrees: when it is true, the function will compute angle in degrees, otherwise they will be measured in radians.
The function phase computes the rotation angle of each 2D vector that is formed from the corresponding elements of ``x`` and ``y``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data type.
Returns void
.. ocv:function:: void ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false)
:param magnitude: the source floating-point array of magnitudes of 2D vectors. It can be an empty matrix (=Mat()) - in this case the function assumes that all the magnitudes are = 1. If it's not empty, it must have the same size and same type as ``angle``.
:param angle: the source floating-point array of angles of the 2D vectors.
:param x: the destination array of x-coordinates of 2D vectors; will have the same size and the same type as ``angle``.
:param y: the destination array of y-coordinates of 2D vectors; will have the same size and the same type as ``angle``.
:param angleInDegrees: the flag indicating whether the angles are measured in radians, which is default mode, or in degrees.
The function polarToCart computes the cartesian coordinates of each 2D vector represented by the corresponding elements of magnitude and angle. Supports only ``CV_32F`` and ``CV_64F`` data types.
Returns void
.. ocv:function:: void ocl::pow(const oclMat &x, double p, oclMat &y)
:param x: the source array.
:param p: the exponent of power; the source floating-point array of angles of the 2D vectors.
:param y: the destination array, should be the same type as the source.
The function pow raises every element of the input array to ``p``. Supports only ``CV_32FC1`` and ``CV_64FC1`` data types.
Returns void
.. ocv:function:: void ocl::setIdentity(oclMat& src, const Scalar & val = Scalar(1))
:param src: matrix to initialize (not necessarily square).
:param val: value to assign to diagonal elements.
The function initializes a scaled identity matrix.
Returns void
.. ocv:function:: void ocl::sortByKey(oclMat& keys, oclMat& values, int method, bool isGreaterThan = false)
:param keys: the keys to be used as sorting indices.
:param values: the array of values.
:param isGreaterThan: determine sorting order.
:param method: supported sorting methods:
* **SORT_BITONIC** bitonic sort, only support power-of-2 buffer size.
* **SORT_SELECTION** selection sort, currently cannot sort duplicate keys.
* **SORT_MERGE** merge sort.
* **SORT_RADIX** radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``).
Returns the sorted result of all the elements in values based on equivalent keys.
The element unit in the values to be sorted is determined from the data type, i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.
Both keys and values will be sorted inplace.
Keys needs to be a **single** channel ``oclMat``.
input -
keys = {2, 3, 1} (CV_8UC1)
values = {10,5, 4,3, 6,2} (CV_8UC2)
sortByKey(keys, values, SORT_SELECTION, false);
output -
keys = {1, 2, 3} (CV_8UC1)
values = {6,2, 10,5, 4,3} (CV_8UC2)
Returns void
.. ocv:function:: void ocl::split(const oclMat &src, vector<oclMat> &dst)
:param src: The source multi-channel array
:param dst: The destination array or vector of arrays; The number of arrays must match src.channels(). The arrays themselves will be reallocated if needed.
The functions split split multi-channel array into separate single-channel arrays. Supports all data types.
Returns void
.. ocv:function:: void ocl::sqrt(const oclMat &src, oclMat &dst)
:param src: the first source array.
:param dst: the dst array; must have the same size and same type as ``src``.
The function ``sqrt`` calculates the square root of each input array element. Supports only ``CV_32FC1`` and ``CV_64F`` data types.
Returns void
.. ocv:function:: void ocl::subtract(const oclMat& src1, const oclMat& src2, oclMat& dst, const oclMat& mask = oclMat())
.. ocv:function:: void ocl::subtract(const oclMat& src1, const Scalar& s, oclMat& dst, const oclMat& mask = oclMat())
:param src1: the first input array.
:param src2: the second input array, must be the same size and same type as ``src1``.
:param s: scalar, the second input parameter.
:param dst: the destination array, it will have the same size and same type as ``src1``.
:param mask: the optional operation mask, 8-bit single channel array; specifies elements of the destination array to be changed.
Computes per-element subtract between two arrays or between array and a scalar. Supports all data types.
Returns void
.. ocv:function:: void ocl::transpose(const oclMat &src, oclMat &dst)
:param src: the source array.
:param dst: the destination array of the same type as ``src``.
Transposes a matrix (in case when ``src`` == ``dst`` and matrix is square the operation are performed inplace).
@ -1,56 +0,0 @@
Data Structures and Utility Functions
.. highlight:: cpp
Returns the list of OpenCL platforms
.. ocv:function:: int ocl::getOpenCLPlatforms( PlatformsInfo& platforms )
:param platforms: Output variable
Returns the list of devices
.. ocv:function:: int ocl::getOpenCLDevices( DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU, const PlatformInfo* platform = NULL )
:param devices: Output variable
:param platform: Specifies preferrable platform
Initialize OpenCL computation context
.. ocv:function:: void ocl::setDevice( const DeviceInfo* info )
:param info: device info
Alternative way to initialize OpenCL computation context.
.. ocv:function:: void ocl::initializeContext(void* pClPlatform, void* pClContext, void* pClDevice)
:param pClPlatform: selected ``platform_id`` (via pointer, parameter type is ``cl_platform_id*``)
:param pClContext: selected ``cl_context`` (via pointer, parameter type is ``cl_context*``)
:param pClDevice: selected ``cl_device_id`` (via pointer, parameter type is ``cl_device_id*``)
This function can be used for context initialization with D3D/OpenGL interoperability.
Returns void
.. ocv:function:: void ocl::setBinaryPath(const char *path)
:param path: the path of OpenCL kernel binaries
If you call this function and set a valid path, the OCL module will save the compiled kernel to the address in the first time and reload the binary since that. It can save compilation time at the runtime.
@ -1,561 +0,0 @@
Video Analysis
.. highlight:: cpp
.. ocv:class:: ocl::GoodFeaturesToTrackDetector_OCL
Class used for strong corners detection on an image. ::
class GoodFeaturesToTrackDetector_OCL
explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0,
int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04);
//! return 1 rows matrix with CV_32FC2 type
void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat());
//! download points of type Point2f to a vector. the vector's content will be erased
void downloadPoints(const oclMat &points, std::vector<Point2f> &points_v);
int maxCorners;
double qualityLevel;
double minDistance;
int blockSize;
bool useHarrisDetector;
double harrisK;
void releaseMemory()
The class finds the most prominent corners in the image.
.. seealso:: :ocv:func:`goodFeaturesToTrack()`
.. ocv:function:: ocl::GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04)
:param maxCorners: Maximum number of corners to return. If there are more corners than are found, the strongest of them is returned.
:param qualityLevel: Parameter characterizing the minimal accepted quality of image corners. The parameter value is multiplied by the best corner quality measure, which is the minimal eigenvalue (see :ocv:func:`ocl::cornerMinEigenVal` ) or the Harris function response (see :ocv:func:`ocl::cornerHarris` ). The corners with the quality measure less than the product are rejected. For example, if the best corner has the quality measure = 1500, and the ``qualityLevel=0.01`` , then all the corners with the quality measure less than 15 are rejected.
:param minDistance: Minimum possible Euclidean distance between the returned corners.
:param blockSize: Size of an average block for computing a derivative covariation matrix over each pixel neighborhood. See :ocv:func:`cornerEigenValsAndVecs` .
:param useHarrisDetector: Parameter indicating whether to use a Harris detector (see :ocv:func:`ocl::cornerHarris`) or :ocv:func:`ocl::cornerMinEigenVal`.
:param harrisK: Free parameter of the Harris detector.
ocl::GoodFeaturesToTrackDetector_OCL::operator ()
Finds the most prominent corners in the image.
.. ocv:function:: void ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat())
:param image: Input 8-bit, single-channel image.
:param corners: Output vector of detected corners (it will be one row matrix with CV_32FC2 type).
:param mask: Optional region of interest. If the image is not empty (it needs to have the type ``CV_8UC1`` and the same size as ``image`` ), it specifies the region in which the corners are detected.
.. seealso:: :ocv:func:`goodFeaturesToTrack`
Releases inner buffers memory.
.. ocv:function:: void ocl::GoodFeaturesToTrackDetector_OCL::releaseMemory()
.. ocv:class:: ocl::FarnebackOpticalFlow
Class computing a dense optical flow using the Gunnar Farneback's algorithm. ::
class CV_EXPORTS FarnebackOpticalFlow
int numLevels;
double pyrScale;
bool fastPyramids;
int winSize;
int numIters;
int polyN;
double polySigma;
int flags;
void operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy);
void releaseMemory();
/* hidden */
ocl::FarnebackOpticalFlow::operator ()
Computes a dense optical flow using the Gunnar Farneback's algorithm.
.. ocv:function:: void ocl::FarnebackOpticalFlow::operator ()(const oclMat &frame0, const oclMat &frame1, oclMat &flowx, oclMat &flowy)
:param frame0: First 8-bit gray-scale input image
:param frame1: Second 8-bit gray-scale input image
:param flowx: Flow horizontal component
:param flowy: Flow vertical component
.. seealso:: :ocv:func:`calcOpticalFlowFarneback`
Releases unused auxiliary memory buffers.
.. ocv:function:: void ocl::FarnebackOpticalFlow::releaseMemory()
.. ocv:class:: ocl::PyrLKOpticalFlow
Class used for calculating an optical flow. ::
class PyrLKOpticalFlow
void sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts,
oclMat& status, oclMat* err = 0);
void dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err = 0);
Size winSize;
int maxLevel;
int iters;
double derivLambda;
bool useInitialFlow;
float minEigThreshold;
bool getMinEigenVals;
void releaseMemory();
/* hidden */
The class can calculate an optical flow for a sparse feature set or dense optical flow using the iterative Lucas-Kanade method with pyramids.
.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
Calculate an optical flow for a sparse feature set.
.. ocv:function:: void ocl::PyrLKOpticalFlow::sparse(const oclMat& prevImg, const oclMat& nextImg, const oclMat& prevPts, oclMat& nextPts, oclMat& status, oclMat* err = 0)
:param prevImg: First 8-bit input image (supports both grayscale and color images).
:param nextImg: Second input image of the same size and the same type as ``prevImg`` .
:param prevPts: Vector of 2D points for which the flow needs to be found. It must be one row matrix with CV_32FC2 type.
:param nextPts: Output vector of 2D points (with single-precision floating-point coordinates) containing the calculated new positions of input features in the second image. When ``useInitialFlow`` is true, the vector must have the same size as in the input.
:param status: Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the flow for the corresponding features has been found. Otherwise, it is set to 0.
:param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
.. seealso:: :ocv:func:`calcOpticalFlowPyrLK`
Calculate dense optical flow.
.. ocv:function:: void ocl::PyrLKOpticalFlow::dense(const oclMat& prevImg, const oclMat& nextImg, oclMat& u, oclMat& v, oclMat* err = 0)
:param prevImg: First 8-bit grayscale input image.
:param nextImg: Second input image of the same size and the same type as ``prevImg`` .
:param u: Horizontal component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
:param v: Vertical component of the optical flow of the same size as input images, 32-bit floating-point, single-channel
:param err: Output vector (CV_32FC1 type) that contains the difference between patches around the original and moved points or min eigen value if ``getMinEigenVals`` is checked. It can be NULL, if not needed.
Releases inner buffers memory.
.. ocv:function:: void ocl::PyrLKOpticalFlow::releaseMemory()
Interpolates frames (images) using provided optical flow (displacement field).
.. ocv:function:: void ocl::interpolateFrames(const oclMat& frame0, const oclMat& frame1, const oclMat& fu, const oclMat& fv, const oclMat& bu, const oclMat& bv, float pos, oclMat& newFrame, oclMat& buf)
:param frame0: First frame (32-bit floating point images, single channel).
:param frame1: Second frame. Must have the same type and size as ``frame0`` .
:param fu: Forward horizontal displacement.
:param fv: Forward vertical displacement.
:param bu: Backward horizontal displacement.
:param bv: Backward vertical displacement.
:param pos: New frame position.
:param newFrame: Output image.
:param buf: Temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat: occlusion masks for first frame, occlusion masks for second, interpolated forward horizontal flow, interpolated forward vertical flow, interpolated backward horizontal flow, interpolated backward vertical flow.
.. ocv:class:: ocl::KalmanFilter
Kalman filter class. ::
class CV_EXPORTS KalmanFilter
//! the full constructor taking the dimensionality of the state, of the measurement and of the control vector
KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
//! re-initializes Kalman filter. The previous content is destroyed.
void init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F);
const oclMat& predict(const oclMat& control=oclMat());
const oclMat& correct(const oclMat& measurement);
oclMat statePre; //!< predicted state (x'(k)): x(k)=A*x(k-1)+B*u(k)
oclMat statePost; //!< corrected state (x(k)): x(k)=x'(k)+K(k)*(z(k)-H*x'(k))
oclMat transitionMatrix; //!< state transition matrix (A)
oclMat controlMatrix; //!< control matrix (B) (not used if there is no control)
oclMat measurementMatrix; //!< measurement matrix (H)
oclMat processNoiseCov; //!< process noise covariance matrix (Q)
oclMat measurementNoiseCov;//!< measurement noise covariance matrix (R)
oclMat errorCovPre; //!< priori error estimate covariance matrix (P'(k)): P'(k)=A*P(k-1)*At + Q)*/
oclMat gain; //!< Kalman gain matrix (K(k)): K(k)=P'(k)*Ht*inv(H*P'(k)*Ht+R)
oclMat errorCovPost; //!< posteriori error estimate covariance matrix (P(k)): P(k)=(I-K(k)*H)*P'(k)
/* hidden */
The constructors.
.. ocv:function:: ocl::KalmanFilter::KalmanFilter()
.. ocv:function:: ocl::KalmanFilter::KalmanFilter(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F)
The full constructor.
:param dynamParams: Dimensionality of the state.
:param measureParams: Dimensionality of the measurement.
:param controlParams: Dimensionality of the control vector.
:param type: Type of the created matrices that should be ``CV_32F`` or ``CV_64F``.
Re-initializes Kalman filter. The previous content is destroyed.
.. ocv:function:: void ocl::KalmanFilter::init(int dynamParams, int measureParams, int controlParams=0, int type=CV_32F)
:param dynamParams: Dimensionalityensionality of the state.
:param measureParams: Dimensionality of the measurement.
:param controlParams: Dimensionality of the control vector.
:param type: Type of the created matrices that should be ``CV_32F`` or ``CV_64F``.
Computes a predicted state.
.. ocv:function:: const oclMat& ocl::KalmanFilter::predict(const oclMat& control=oclMat())
:param control: The optional input control
Updates the predicted state from the measurement.
.. ocv:function:: const oclMat& ocl::KalmanFilter::correct(const oclMat& measurement)
:param measurement: The measured system parameters
.. ocv:class:: ocl::BackgroundSubtractor
Base class for background/foreground segmentation. ::
class CV_EXPORTS BackgroundSubtractor
//! the virtual destructor
virtual ~BackgroundSubtractor();
//! the update operator that takes the next video frame and returns the current foreground mask as 8-bit binary image.
virtual void operator()(const oclMat& image, oclMat& fgmask, float learningRate);
//! computes a background image
virtual void getBackgroundImage(oclMat& backgroundImage) const = 0;
The class is only used to define the common interface for the whole family of background/foreground segmentation algorithms.
Computes a foreground mask.
.. ocv:function:: void ocl::BackgroundSubtractor::operator()(const oclMat& image, oclMat& fgmask, float learningRate)
:param image: Next video frame.
:param fgmask: The output foreground mask as an 8-bit binary image.
Computes a background image.
.. ocv:function:: void ocl::BackgroundSubtractor::getBackgroundImage(oclMat& backgroundImage) const
:param backgroundImage: The output background image.
.. note:: Sometimes the background image can be very blurry, as it contain the average background statistics.
.. ocv:class:: ocl::MOG : public ocl::BackgroundSubtractor
Gaussian Mixture-based Backbround/Foreground Segmentation Algorithm. ::
class CV_EXPORTS MOG: public cv::ocl::BackgroundSubtractor
//! the default constructor
MOG(int nmixtures = -1);
//! re-initiaization method
void initialize(Size frameSize, int frameType);
//! the update operator
void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f);
//! computes a background image which are the mean of all background gaussians
void getBackgroundImage(oclMat& backgroundImage) const;
//! releases all inner buffers
void release();
int history;
float varThreshold;
float backgroundRatio;
float noiseSigma;
/* hidden */
The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2001]_.
.. seealso:: :ocv:class:`BackgroundSubtractorMOG`
The constructor.
.. ocv:function:: ocl::MOG::MOG(int nmixtures = -1)
:param nmixtures: Number of Gaussian mixtures.
Default constructor sets all parameters to default values.
Updates the background model and returns the foreground mask.
.. ocv:function:: void ocl::MOG::operator()(const oclMat& frame, oclMat& fgmask, float learningRate = 0.f)
:param frame: Next video frame.
:param fgmask: The output foreground mask as an 8-bit binary image.
Computes a background image.
.. ocv:function:: void ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
:param backgroundImage: The output background image.
Releases all inner buffer's memory.
.. ocv:function:: void ocl::MOG::release()
.. ocv:class:: ocl::MOG2 : public ocl::BackgroundSubtractor
Gaussian Mixture-based Background/Foreground Segmentation Algorithm.
The class discriminates between foreground and background pixels by building and maintaining a model of the background. Any pixel which does not fit this model is then deemed to be foreground. The class implements algorithm described in [MOG2004]_. ::
class CV_EXPORTS MOG2: public cv::ocl::BackgroundSubtractor
//! the default constructor
MOG2(int nmixtures = -1);
//! re-initiaization method
void initialize(Size frameSize, int frameType);
//! the update operator
void operator()(const oclMat& frame, oclMat& fgmask, float learningRate = -1.0f);
//! computes a background image which are the mean of all background gaussians
void getBackgroundImage(oclMat& backgroundImage) const;
//! releases all inner buffers
void release();
int history;
float varThreshold;
float backgroundRatio;
float varThresholdGen;
float fVarInit;
float fVarMin;
float fVarMax;
float fCT;
bool bShadowDetection;
unsigned char nShadowDetection;
float fTau;
/* hidden */
.. ocv:member:: float backgroundRatio
Threshold defining whether the component is significant enough to be included into the background model. ``cf=0.1 => TB=0.9`` is default. For ``alpha=0.001``, it means that the mode should exist for approximately 105 frames before it is considered foreground.
.. ocv:member:: float varThreshold
Threshold for the squared Mahalanobis distance that helps decide when a sample is close to the existing components (corresponds to ``Tg``). If it is not close to any component, a new component is generated. ``3 sigma => Tg=3*3=9`` is default. A smaller ``Tg`` value generates more components. A higher ``Tg`` value may result in a small number of components but they can grow too large.
.. ocv:member:: float fVarInit
Initial variance for the newly generated components. It affects the speed of adaptation. The parameter value is based on your estimate of the typical standard deviation from the images. OpenCV uses 15 as a reasonable value.
.. ocv:member:: float fVarMin
Parameter used to further control the variance.
.. ocv:member:: float fVarMax
Parameter used to further control the variance.
.. ocv:member:: float fCT
Complexity reduction parameter. This parameter defines the number of samples needed to accept to prove the component exists. ``CT=0.05`` is a default value for all the samples. By setting ``CT=0`` you get an algorithm very similar to the standard Stauffer&Grimson algorithm.
.. ocv:member:: uchar nShadowDetection
The value for marking shadow pixels in the output foreground mask. Default value is 127.
.. ocv:member:: float fTau
Shadow threshold. The shadow is detected if the pixel is a darker version of the background. ``Tau`` is a threshold defining how much darker the shadow can be. ``Tau= 0.5`` means that if a pixel is more than twice darker then it is not shadow. See [ShadowDetect2003]_.
.. ocv:member:: bool bShadowDetection
Parameter defining whether shadow detection should be enabled.
.. seealso:: :ocv:class:`BackgroundSubtractorMOG2`
The constructor.
.. ocv:function:: ocl::MOG2::MOG2(int nmixtures = -1)
:param nmixtures: Number of Gaussian mixtures.
Default constructor sets all parameters to default values.
Updates the background model and returns the foreground mask.
.. ocv:function:: void ocl::MOG2::operator()( const oclMat& frame, oclMat& fgmask, float learningRate=-1.0f)
:param frame: Next video frame.
:param fgmask: The output foreground mask as an 8-bit binary image.
Computes a background image.
.. ocv:function:: void ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
:param backgroundImage: The output background image.
Releases all inner buffer's memory.
.. ocv:function:: void ocl::MOG2::release()
.. [ShadowDetect2003] Prati, Mikic, Trivedi and Cucchiarra. *Detecting Moving Shadows...*. IEEE PAMI, 2003
File diff suppressed because it is too large
Load Diff
@ -1,490 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "opencv2/ocl.hpp"
namespace cv
namespace ocl
MAT_ADD = 1,
class CV_EXPORTS oclMatExpr
oclMatExpr() : a(oclMat()), b(oclMat()), op(0) {}
oclMatExpr(const oclMat& _a, const oclMat& _b, int _op)
: a(_a), b(_b), op(_op) {}
operator oclMat() const;
void assign(oclMat& m) const;
oclMat a, b;
int op;
//////////////////////////////// oclMat ////////////////////////////////
inline oclMat::oclMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0) {}
inline oclMat::oclMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
if( _rows > 0 && _cols > 0 )
create( _rows, _cols, _type );
inline oclMat::oclMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type );
inline oclMat::oclMat(int _rows, int _cols, int _type, const Scalar &_s)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
if(_rows > 0 && _cols > 0)
create(_rows, _cols, _type);
*this = _s;
inline oclMat::oclMat(Size _size, int _type, const Scalar &_s)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
if( _size.height > 0 && _size.width > 0 )
create( _size.height, _size.width, _type );
*this = _s;
inline oclMat::oclMat(const oclMat &m)
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(,
refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
if( refcount )
CV_XADD(refcount, 1);
inline oclMat::oclMat(int _rows, int _cols, int _type, void *_data, size_t _step)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0),
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
cv::Mat m(_rows, _cols, _type, _data, _step);
//size_t minstep = cols * elemSize();
//if( step == Mat::AUTO_STEP )
// step = minstep;
// flags |= Mat::CONTINUOUS_FLAG;
// if( rows == 1 ) step = minstep;
// CV_DbgAssert( step >= minstep );
// flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
//dataend += step * (rows - 1) + minstep;
inline oclMat::oclMat(Size _size, int _type, void *_data, size_t _step)
: flags(0), rows(0), cols(0),
step(0), data(0), refcount(0),
datastart(0), dataend(0), offset(0), wholerows(0), wholecols(0)
cv::Mat m(_size, _type, _data, _step);
//size_t minstep = cols * elemSize();
//if( step == Mat::AUTO_STEP )
// step = minstep;
// flags |= Mat::CONTINUOUS_FLAG;
// if( rows == 1 ) step = minstep;
// CV_DbgAssert( step >= minstep );
// flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
//dataend += step * (rows - 1) + minstep;
inline oclMat::oclMat(const oclMat &m, const Range &rRange, const Range &cRange)
flags = m.flags;
step = m.step;
refcount = m.refcount;
data =;
datastart = m.datastart;
dataend = m.dataend;
clCxt = m.clCxt;
wholerows = m.wholerows;
wholecols = m.wholecols;
offset = m.offset;
if( rRange == Range::all() )
rows = m.rows;
CV_Assert( 0 <= rRange.start && rRange.start <= rRange.end && rRange.end <= m.rows );
rows = rRange.size();
offset += step * rRange.start;
if( cRange == Range::all() )
cols = m.cols;
CV_Assert( 0 <= cRange.start && cRange.start <= cRange.end && cRange.end <= m.cols );
cols = cRange.size();
offset += cRange.start * elemSize();
flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
if( rows == 1 )
flags |= Mat::CONTINUOUS_FLAG;
if( refcount )
CV_XADD(refcount, 1);
if( rows <= 0 || cols <= 0 )
rows = cols = 0;
inline oclMat::oclMat(const oclMat &m, const Rect &roi)
: flags(m.flags), rows(roi.height), cols(roi.width),
step(m.step), data(, refcount(m.refcount),
datastart(m.datastart), dataend(m.dataend), clCxt(m.clCxt), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols)
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
offset += roi.y * step + roi.x * elemSize();
CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.wholecols &&
0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.wholerows );
if( refcount )
CV_XADD(refcount, 1);
if( rows <= 0 || cols <= 0 )
rows = cols = 0;
inline oclMat::oclMat(const Mat &m)
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) , offset(0), wholerows(0), wholecols(0)
//clCxt = Context::getContext();
inline oclMat::~oclMat()
inline oclMat &oclMat::operator = (const oclMat &m)
if( this != &m )
if( m.refcount )
CV_XADD(m.refcount, 1);
clCxt = m.clCxt;
flags = m.flags;
rows = m.rows;
cols = m.cols;
step = m.step;
data =;
datastart = m.datastart;
dataend = m.dataend;
offset = m.offset;
wholerows = m.wholerows;
wholecols = m.wholecols;
refcount = m.refcount;
return *this;
inline oclMat &oclMat::operator = (const Mat &m)
//clCxt = Context::getContext();
return *this;
inline oclMat& oclMat::operator = (const oclMatExpr& expr)
return *this;
/* Fixme! To be supported in OpenCL later. */
#if 0
template <class T> inline oclMat::operator DevMem2D_<T>() const
return DevMem2D_<T>(rows, cols, (T *)data, step);
template <class T> inline oclMat::operator PtrStep_<T>() const
return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this));
//CPP: void oclMat::upload(const Mat& m);
inline oclMat::operator Mat() const
Mat m;
return m;
//CPP void oclMat::download(cv::Mat& m) const;
inline oclMat oclMat::row(int y) const
return oclMat(*this, Range(y, y + 1), Range::all());
inline oclMat oclMat::col(int x) const
return oclMat(*this, Range::all(), Range(x, x + 1));
inline oclMat oclMat::rowRange(int startrow, int endrow) const
return oclMat(*this, Range(startrow, endrow), Range::all());
inline oclMat oclMat::rowRange(const Range &r) const
return oclMat(*this, r, Range::all());
inline oclMat oclMat::colRange(int startcol, int endcol) const
return oclMat(*this, Range::all(), Range(startcol, endcol));
inline oclMat oclMat::colRange(const Range &r) const
return oclMat(*this, Range::all(), r);
inline oclMat oclMat::clone() const
oclMat m;
return m;
//CPP void oclMat::copyTo( oclMat& m ) const;
//CPP void oclMat::copyTo( oclMat& m, const oclMat& mask ) const;
//CPP void oclMat::convertTo( oclMat& m, int rtype, double alpha=1, double beta=0 ) const;
inline void oclMat::assignTo( oclMat &m, int mtype ) const
if( mtype < 0 )
m = *this;
convertTo(m, mtype);
//CPP oclMat& oclMat::operator = (const Scalar& s);
//CPP oclMat& oclMat::setTo(const Scalar& s, const oclMat& mask=oclMat());
//CPP oclMat oclMat::reshape(int _cn, int _rows=0) const;
inline void oclMat::create(Size _size, int _type)
create(_size.height, _size.width, _type);
//CPP void oclMat::create(int _rows, int _cols, int _type);
//CPP void oclMat::release();
inline void oclMat::swap(oclMat &b)
std::swap( flags, b.flags );
std::swap( rows, b.rows );
std::swap( cols, b.cols );
std::swap( step, b.step );
std::swap( data, );
std::swap( datastart, b.datastart );
std::swap( dataend, b.dataend );
std::swap( refcount, b.refcount );
std::swap( offset, b.offset );
std::swap( clCxt, b.clCxt );
std::swap( wholerows, b.wholerows );
std::swap( wholecols, b.wholecols );
inline void oclMat::locateROI( Size &wholeSize, Point &ofs ) const
size_t esz = elemSize();//, minstep;
//ptrdiff_t delta1 = offset;//, delta2 = dataend - datastart;
CV_DbgAssert( step > 0 );
if( offset == 0 )
ofs.x = ofs.y = 0;
ofs.y = (int)(offset / step);
ofs.x = (int)((offset - step * ofs.y) / esz);
//CV_DbgAssert( data == datastart + ofs.y*step + ofs.x*esz );
//minstep = (ofs.x + cols)*esz;
//wholeSize.height = (int)((delta2 - minstep)/step + 1);
//wholeSize.height = std::max(wholeSize.height, ofs.y + rows);
//wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz);
//wholeSize.width = std::max(wholeSize.width, ofs.x + cols);
wholeSize.height = wholerows;
wholeSize.width = wholecols;
inline oclMat &oclMat::adjustROI( int dtop, int dbottom, int dleft, int dright )
Size wholeSize;
Point ofs;
size_t esz = elemSize();
locateROI( wholeSize, ofs );
int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width);
offset += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
rows = row2 - row1;
cols = col2 - col1;
if( esz * cols == step || rows == 1 )
flags |= Mat::CONTINUOUS_FLAG;
flags &= ~Mat::CONTINUOUS_FLAG;
return *this;
inline oclMat oclMat::operator()( Range rRange, Range cRange ) const
return oclMat(*this, rRange, cRange);
inline oclMat oclMat::operator()( const Rect &roi ) const
return oclMat(*this, roi);
inline bool oclMat::isContinuous() const
return (flags & Mat::CONTINUOUS_FLAG) != 0;
inline size_t oclMat::elemSize() const
return CV_ELEM_SIZE((CV_MAKE_TYPE(type(), oclchannels())));
inline size_t oclMat::elemSize1() const
return CV_ELEM_SIZE1(flags);
inline int oclMat::type() const
return CV_MAT_TYPE(flags);
inline int oclMat::ocltype() const
return CV_MAKE_TYPE(depth(), oclchannels());
inline int oclMat::depth() const
return CV_MAT_DEPTH(flags);
inline int oclMat::channels() const
return CV_MAT_CN(flags);
inline int oclMat::oclchannels() const
return (CV_MAT_CN(flags)) == 3 ? 4 : (CV_MAT_CN(flags));
inline size_t oclMat::step1() const
return step / elemSize1();
inline Size oclMat::size() const
return Size(cols, rows);
inline bool oclMat::empty() const
return data == 0;
inline oclMat oclMat::t() const
oclMat tmp;
transpose(*this, tmp);
return tmp;
static inline void swap( oclMat &a, oclMat &b )
inline void ensureSizeIsEnough(int rows, int cols, int type, oclMat &m)
if (m.type() == type && m.rows >= rows && m.cols >= cols)
m = m(Rect(0, 0, cols, rows));
m.create(rows, cols, type);
inline void ensureSizeIsEnough(Size size, int type, oclMat &m)
ensureSizeIsEnough(size.height, size.width, type, m);
} /* end of namespace ocl */
} /* end of namespace cv */
@ -1,48 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#error this is a compatibility header which should not be used inside the OpenCV library
#include "opencv2/ocl.hpp"
@ -1,154 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#if !defined(DUMP_MESSAGE_STDOUT) && !defined(DUMP_PROPERTY_XML)
#error Invalid usage
#if !defined(DUMP_PROPERTY_XML)
#define DUMP_PROPERTY_XML(...)
#include <sstream>
static std::string bytesToStringRepr(size_t value)
size_t b = value % 1024;
value /= 1024;
size_t kb = value % 1024;
value /= 1024;
size_t mb = value % 1024;
value /= 1024;
size_t gb = value;
std::ostringstream stream;
if (gb > 0)
stream << gb << " GB ";
if (mb > 0)
stream << mb << " MB ";
if (kb > 0)
stream << kb << " kB ";
if (b > 0)
stream << b << " B";
return stream.str();
static void dumpOpenCLDevice()
using namespace cv::ocl;
cv::ocl::PlatformsInfo platforms;
DUMP_MESSAGE_STDOUT("OpenCL Platforms: ");
const char* deviceTypeStr;
for(unsigned int i=0; i < platforms.size(); i++)
DUMP_MESSAGE_STDOUT(" " <<>platformName);
const cv::ocl::DevicesInfo& devices =>devices;
for(unsigned int j=0; j < devices.size(); j++)
const cv::ocl::DeviceInfo& current_device = *;
deviceTypeStr = current_device.deviceType == CVCL_DEVICE_TYPE_CPU
? ("CPU") : (current_device.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
DUMP_MESSAGE_STDOUT( " " << deviceTypeStr << " : " << current_device.deviceName << " : " << current_device.deviceVersion );
DUMP_PROPERTY_XML("cv_ocl_platform_"<< i<<"_device_"<<j, "(Platform=" << current_device.platform->platformName << ")(Type="
<< deviceTypeStr <<")(Name="<< current_device.deviceName << ")(Version="<< current_device.deviceVersion<<")");
DUMP_MESSAGE_STDOUT("Current OpenCL device: ");
const cv::ocl::DeviceInfo& deviceInfo = cv::ocl::Context::getContext()->getDeviceInfo();
DUMP_MESSAGE_STDOUT(" Platform = "<< deviceInfo.platform->platformName);
DUMP_PROPERTY_XML("cv_ocl_current_platformName", deviceInfo.platform->platformName);
deviceTypeStr = deviceInfo.deviceType == CVCL_DEVICE_TYPE_CPU
? "CPU" : (deviceInfo.deviceType == CVCL_DEVICE_TYPE_GPU ? "GPU" : "unknown");
DUMP_MESSAGE_STDOUT(" Type = "<< deviceTypeStr);
DUMP_PROPERTY_XML("cv_ocl_current_deviceType", deviceTypeStr);
DUMP_MESSAGE_STDOUT(" Name = "<< deviceInfo.deviceName);
DUMP_PROPERTY_XML("cv_ocl_current_deviceName", deviceInfo.deviceName);
DUMP_MESSAGE_STDOUT(" Version = " << deviceInfo.deviceVersion);
DUMP_PROPERTY_XML("cv_ocl_current_deviceVersion", deviceInfo.deviceVersion);
DUMP_MESSAGE_STDOUT(" Compute units = "<< deviceInfo.maxComputeUnits);
DUMP_PROPERTY_XML("cv_ocl_current_maxComputeUnits", deviceInfo.maxComputeUnits);
DUMP_MESSAGE_STDOUT(" Max work group size = "<< deviceInfo.maxWorkGroupSize);
DUMP_PROPERTY_XML("cv_ocl_current_maxWorkGroupSize", deviceInfo.maxWorkGroupSize);
std::string localMemorySizeStr = bytesToStringRepr(deviceInfo.localMemorySize);
DUMP_MESSAGE_STDOUT(" Local memory size = "<< localMemorySizeStr.c_str());
DUMP_PROPERTY_XML("cv_ocl_current_localMemorySize", deviceInfo.localMemorySize);
std::string maxMemAllocSizeStr = bytesToStringRepr(deviceInfo.maxMemAllocSize);
DUMP_MESSAGE_STDOUT(" Max memory allocation size = "<< maxMemAllocSizeStr.c_str());
DUMP_PROPERTY_XML("cv_ocl_current_maxMemAllocSize", deviceInfo.maxMemAllocSize);
const char* doubleSupportStr = deviceInfo.haveDoubleSupport ? "Yes" : "No";
DUMP_MESSAGE_STDOUT(" Double support = "<< doubleSupportStr);
DUMP_PROPERTY_XML("cv_ocl_current_haveDoubleSupport", deviceInfo.haveDoubleSupport);
const char* isUnifiedMemoryStr = deviceInfo.isUnifiedMemory ? "Yes" : "No";
DUMP_MESSAGE_STDOUT(" Unified memory = "<< isUnifiedMemoryStr);
DUMP_PROPERTY_XML("cv_ocl_current_isUnifiedMemory", deviceInfo.isUnifiedMemory);
catch (...)
DUMP_MESSAGE_STDOUT("OpenCL device not available");
DUMP_PROPERTY_XML("cv_ocl", "not available");
@ -1,115 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include <vector>
#include <string>
namespace cl_utils {
inline cl_int getPlatforms(std::vector<cl_platform_id>& platforms)
cl_uint n = 0;
cl_int err = ::clGetPlatformIDs(0, NULL, &n);
if (err != CL_SUCCESS)
return err;
platforms.clear(); platforms.resize(n);
err = ::clGetPlatformIDs(n, &platforms[0], NULL);
if (err != CL_SUCCESS)
return err;
return CL_SUCCESS;
inline cl_int getDevices(cl_platform_id platform, cl_device_type type, std::vector<cl_device_id>& devices)
cl_uint n = 0;
cl_int err = ::clGetDeviceIDs(platform, type, 0, NULL, &n);
if (err != CL_SUCCESS)
return err;
devices.clear(); devices.resize(n);
err = ::clGetDeviceIDs(platform, type, n, &devices[0], NULL);
if (err != CL_SUCCESS)
return err;
return CL_SUCCESS;
template <typename Functor, typename ObjectType, typename T>
inline cl_int getScalarInfo(Functor f, ObjectType obj, cl_uint name, T& param)
return f(obj, name, sizeof(T), ¶m, NULL);
template <typename Functor, typename ObjectType>
inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
::size_t required;
cl_int err = f(obj, name, 0, NULL, &required);
if (err != CL_SUCCESS)
return err;
if (required > 0)
std::vector<char> buf(required + 1, char(0));
err = f(obj, name, required, &buf[0], NULL);
if (err != CL_SUCCESS)
return err;
param = &buf[0];
return CL_SUCCESS;
} // namespace cl_utils
@ -1,191 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include "opencv2/core/ocl_genbase.hpp"
#include "opencv2/ocl.hpp"
namespace cv
namespace ocl
inline cl_device_id getClDeviceID(const Context *ctx)
return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
inline cl_context getClContext(const Context *ctx)
return *(cl_context*)(ctx->getOpenCLContextPtr());
inline cl_command_queue getClCommandQueue(const Context *ctx)
return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr());
CV_EXPORTS cv::Mutex& getInitializationMutex();
enum openCLMemcpyKind
clMemcpyHostToDevice = 0,
///////////////////////////OpenCL call wrappers////////////////////////////
CV_EXPORTS void openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height);
CV_EXPORTS void openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
CV_EXPORTS void openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
const void *src, size_t spitch,
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
CV_EXPORTS void openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
const void *src, size_t spitch,
size_t width, size_t height, int src_offset);
CV_EXPORTS void openCLFree(void *devPtr);
CV_EXPORTS cl_mem openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
CV_EXPORTS void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt,
const cv::ocl::ProgramEntry* source, String kernelName);
CV_EXPORTS cl_kernel openCLGetKernelFromSource(const Context *clCxt,
const cv::ocl::ProgramEntry* source, String kernelName, const char *build_options);
CV_EXPORTS cl_kernel openCLGetKernelFromSource(Context *ctx, const cv::ocl::ProgramEntry* source,
String kernelName, int channels, int depth, const char *build_options);
CV_EXPORTS void openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
CV_EXPORTS void openCLExecuteKernel(Context *ctx, cl_kernel kernel, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args);
CV_EXPORTS void openCLExecuteKernel(Context *clCxt , const cv::ocl::ProgramEntry* source, String kernelName, std::vector< std::pair<size_t, const void *> > &args,
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
CV_EXPORTS void openCLExecuteKernel_(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
CV_EXPORTS void openCLExecuteKernel(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options);
CV_EXPORTS cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
const size_t size);
CV_EXPORTS cl_mem openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
CV_EXPORTS void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
CV_EXPORTS void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options, FLUSH_MODE finish_mode = DISABLE);
// bind oclMat to OpenCL image textures
// note:
// 1. there is no memory management. User need to explicitly release the resource
// 2. for faster clamping, there is no buffer padding for the constructed texture
CV_EXPORTS cl_mem bindTexture(const oclMat &mat);
CV_EXPORTS void releaseTexture(cl_mem& texture);
//Represents an image texture object
class CV_EXPORTS TextureCL
TextureCL(cl_mem tex, int r, int c, int t)
: tex_(tex), rows(r), cols(c), type(t) {}
operator cl_mem()
return tex_;
cl_mem const tex_;
const int rows;
const int cols;
const int type;
//disable assignment
void operator=(const TextureCL&);
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
// note:
// for faster clamping, there is no buffer padding for the constructed texture
CV_EXPORTS Ptr<TextureCL> bindTexturePtr(const oclMat &mat);
CV_EXPORTS bool isCpuDevice();
CV_EXPORTS size_t queryWaveFrontSize(cl_kernel kernel);
inline size_t divUp(size_t total, size_t grain)
return (total + grain - 1) / grain;
inline size_t roundUp(size_t sz, size_t n)
// we don't assume that n is a power of 2 (see alignSize)
// equal to divUp(sz, n) * n
size_t t = sz + n - 1;
size_t rem = t % n;
size_t result = t - rem;
return result;
}//namespace ocl
}//namespace cv
@ -1,76 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
#define DUMP_PROPERTY_XML(propertyName, propertyValue) \
do { \
std::stringstream ssName, ssValue;\
ssName << propertyName;\
ssValue << propertyValue; \
::testing::Test::RecordProperty(ssName.str(), ssValue.str()); \
} while (false)
#define DUMP_MESSAGE_STDOUT(msg) \
do { \
std::cout << msg << std::endl; \
} while (false)
#include "opencv2/ocl/private/opencl_dumpinfo.hpp"
static const char * impls[] =
int main(int argc, char ** argv)
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls, ::dumpOpenCLDevice())
File diff suppressed because it is too large
Load Diff
@ -1,289 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using namespace std;
using namespace cv::ocl;
using namespace cv;
using std::tr1::tuple;
using std::tr1::get;
#if defined(HAVE_XINE) || \
defined(HAVE_GSTREAMER) || \
defined(HAVE_QUICKTIME) || \
defined(HAVE_FFMPEG) || \
static void cvtFrameFmt(vector<Mat>& input, vector<Mat>& output)
for(int i = 0; i< (int)(input.size()); i++)
cvtColor(input[i], output[i], COLOR_RGB2GRAY);
//prepare data for CPU
static void prepareData(VideoCapture& cap, int cn, vector<Mat>& frame_buffer)
cv::Mat frame;
std::vector<Mat> frame_buffer_init;
int nFrame = (int)frame_buffer.size();
for(int i = 0; i < nFrame; i++)
cap >> frame;
if(cn == 1)
cvtFrameFmt(frame_buffer_init, frame_buffer);
frame_buffer = frame_buffer_init;
//copy CPU data to GPU
static void prepareData(vector<Mat>& frame_buffer, vector<oclMat>& frame_buffer_ocl)
for(int i = 0; i < (int)frame_buffer.size(); i++)
///////////// MOG ////////////////////////
typedef tuple<string, int, double> VideoMOGParamType;
typedef TestBaseWithParam<VideoMOGParamType> VideoMOGFixture;
::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
::testing::Values(1, 3),
::testing::Values(0.0, 0.01)))
VideoMOGParamType params = GetParam();
const string inputFile = perf::TestBase::getDataPath(get<0>(params));
const int cn = get<1>(params);
const float learningRate = static_cast<float>(get<2>(params));
const int nFrame = 5;
Mat foreground_cpu;
std::vector<Mat> frame_buffer(nFrame);
std::vector<oclMat> frame_buffer_ocl;
cv::VideoCapture cap(inputFile);
prepareData(cap, cn, frame_buffer);
cv::Mat foreground;
cv::ocl::oclMat foreground_d;
cv::Ptr<cv::BackgroundSubtractorMOG> mog = createBackgroundSubtractorMOG();
for (int i = 0; i < nFrame; i++)
mog->apply(frame_buffer[i], foreground, learningRate);
else if(RUN_OCL_IMPL)
prepareData(frame_buffer, frame_buffer_ocl);
CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
cv::ocl::MOG d_mog;
for (int i = 0; i < nFrame; ++i)
d_mog(frame_buffer_ocl[i], foreground_d, learningRate);
///////////// MOG2 ////////////////////////
typedef tuple<string, int> VideoMOG2ParamType;
typedef TestBaseWithParam<VideoMOG2ParamType> VideoMOG2Fixture;
PERF_TEST_P(VideoMOG2Fixture, DISABLED_MOG2, // TODO Disabled: random hungs on buildslave
::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
::testing::Values(1, 3)))
VideoMOG2ParamType params = GetParam();
const string inputFile = perf::TestBase::getDataPath(get<0>(params));
const int cn = get<1>(params);
int nFrame = 5;
std::vector<cv::Mat> frame_buffer(nFrame);
std::vector<cv::ocl::oclMat> frame_buffer_ocl;
cv::VideoCapture cap(inputFile);
prepareData(cap, cn, frame_buffer);
cv::Mat foreground;
cv::ocl::oclMat foreground_d;
cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
for (int i = 0; i < nFrame; i++)
mog2->apply(frame_buffer[i], foreground);
else if(RUN_OCL_IMPL)
prepareData(frame_buffer, frame_buffer_ocl);
CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
cv::ocl::MOG2 d_mog2;
for (int i = 0; i < nFrame; i++)
d_mog2(frame_buffer_ocl[i], foreground_d);
///////////// MOG2_GetBackgroundImage //////////////////
typedef TestBaseWithParam<VideoMOG2ParamType> Video_MOG2GetBackgroundImage;
PERF_TEST_P(Video_MOG2GetBackgroundImage, MOG2,
::testing::Combine(::testing::Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi"),
VideoMOG2ParamType params = GetParam();
const string inputFile = perf::TestBase::getDataPath(get<0>(params));
const int cn = get<1>(params);
int nFrame = 5;
std::vector<cv::Mat> frame_buffer(nFrame);
std::vector<cv::ocl::oclMat> frame_buffer_ocl;
cv::VideoCapture cap(inputFile);
prepareData(cap, cn, frame_buffer);
cv::Mat foreground;
cv::Mat background;
cv::ocl::oclMat foreground_d;
cv::ocl::oclMat background_d;
cv::Ptr<cv::BackgroundSubtractorMOG2> mog2 = createBackgroundSubtractorMOG2();
for (int i = 0; i < nFrame; i++)
mog2->apply(frame_buffer[i], foreground);
else if(RUN_OCL_IMPL)
prepareData(frame_buffer, frame_buffer_ocl);
CV_Assert((int)(frame_buffer_ocl.size()) == nFrame);
cv::ocl::MOG2 d_mog2;
for (int i = 0; i < nFrame; i++)
d_mog2(frame_buffer_ocl[i], foreground_d);
@ -1,130 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using namespace cv;
using std::tr1::get;
///////////// blend ////////////////////////
template <typename T>
static void blendLinearGold(const Mat &img1, const Mat &img2,
const Mat &weights1, const Mat &weights2,
Mat &result_gold)
CV_Assert(img1.size() == img2.size() && img1.type() == img2.type());
CV_Assert(weights1.size() == weights2.size() && weights1.size() == img1.size() &&
weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);
result_gold.create(img1.size(), img1.type());
int cn = img1.channels();
int step1 = img1.cols * img1.channels();
for (int y = 0; y < img1.rows; ++y)
const float * const weights1_row = weights1.ptr<float>(y);
const float * const weights2_row = weights2.ptr<float>(y);
const T * const img1_row = img1.ptr<T>(y);
const T * const img2_row = img2.ptr<T>(y);
T * const result_gold_row = result_gold.ptr<T>(y);
for (int x = 0; x < step1; ++x)
int x1 = x / cn;
float w1 = weights1_row[x1], w2 = weights2_row[x1];
result_gold_row[x] = saturate_cast<T>(((float)img1_row[x] * w1
+ (float)img2_row[x] * w2) / (w1 + w2 + 1e-5f));
typedef void (*blendFunction)(const Mat &img1, const Mat &img2,
const Mat &weights1, const Mat &weights2,
Mat &result_gold);
typedef Size_MatType blendLinearFixture;
PERF_TEST_P(blendLinearFixture, blendLinear, ::testing::Combine(
OCL_TYPICAL_MAT_SIZES, testing::Values(CV_8UC1, CV_8UC3, CV_32FC1)))
Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int srcType = get<1>(params);
const double eps = CV_MAT_DEPTH(srcType) <= CV_32S ? 1.0 : 0.2;
Mat src1(srcSize, srcType), src2(srcSize, srcType), dst(srcSize, srcType);
Mat weights1(srcSize, CV_32FC1), weights2(srcSize, CV_32FC1);
||||||, src2, WARMUP_RNG).out(dst);
randu(weights1, 0.0f, 1.0f);
randu(weights2, 0.0f, 1.0f);
ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst;
ocl::oclMat oclWeights1(weights1), oclWeights2(weights2);
OCL_TEST_CYCLE() ocl::blendLinear(oclSrc1, oclSrc2, oclWeights1, oclWeights2, oclDst);
SANITY_CHECK(dst, eps);
else if (RUN_PLAIN_IMPL)
blendFunction funcs[] = { (blendFunction)blendLinearGold<uchar>, (blendFunction)blendLinearGold<float> };
int funcIdx = CV_MAT_DEPTH(srcType) == CV_8UC1 ? 0 : 1;
TEST_CYCLE() (funcs[funcIdx])(src1, src2, weights1, weights2, dst);
SANITY_CHECK(dst, eps);
@ -1,114 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
// Authors:
// * Matthias Bady, aegirxx ==>
#include "perf_precomp.hpp"
using namespace std;
using namespace cv;
using namespace ocl;
using namespace perf;
///////////// BRIEF ////////////////////////
typedef TestBaseWithParam<std::tr1::tuple<std::string, int, size_t> > OCL_BRIEF;
PERF_TEST_P( OCL_BRIEF, extract, testing::Combine(
testing::Values( string( "gpu/opticalflow/rubberwhale1.png" ),
string( "gpu/stereobm/aloe-L.png" )
), testing::Values( 16, 32, 64 ), testing::Values( 250, 500, 1000, 2500, 3000 ) ) )
const std::string filename = std::tr1::get<0>(GetParam( ));
const int bytes = std::tr1::get<1>(GetParam( ));
const size_t numKp = std::tr1::get<2>(GetParam( ));
Mat img = imread( getDataPath( filename ), IMREAD_GRAYSCALE );
ASSERT_TRUE( !img.empty( ) ) << "no input image";
int threshold = 15;
std::vector<KeyPoint> keypoints;
while (threshold > 0 && keypoints.size( ) < numKp)
FastFeatureDetector fast( threshold );
fast.detect( img, keypoints, Mat( ) );
threshold -= 5;
KeyPointsFilter::runByImageBorder( keypoints, img.size( ), BRIEF_OCL::getBorderSize( ) );
ASSERT_TRUE( keypoints.size( ) >= numKp ) << "not enough keypoints";
keypoints.resize( numKp );
Mat kpMat( 2, int( keypoints.size() ), CV_32FC1 );
for ( size_t i = 0; i < keypoints.size( ); ++i )
kpMat.col( int( i ) ).row( 0 ) = keypoints[i].pt.x;
kpMat.col( int( i ) ).row( 1 ) = keypoints[i].pt.y;
BRIEF_OCL brief( bytes );
oclMat imgCL( img ), keypointsCL(kpMat), mask;
while (next( ))
startTimer( );
oclMat descriptorsCL;
brief.compute( imgCL, keypointsCL, mask, descriptorsCL );
cv::ocl::finish( );
stopTimer( );
else if ( RUN_PLAIN_IMPL )
BriefDescriptorExtractor brief( bytes );
while (next( ))
startTimer( );
Mat descriptors;
brief.compute( img, keypoints, descriptors );
stopTimer( );
@ -1,177 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
#define OCL_BFMATCHER_TYPICAL_MAT_SIZES ::testing::Values(cv::Size(128, 500), cv::Size(128, 1000), cv::Size(128, 2000))
//////////////////// BruteForceMatch /////////////////
typedef TestBaseWithParam<Size> BruteForceMatcherFixture;
PERF_TEST_P(BruteForceMatcherFixture, match,
const Size srcSize = GetParam();
vector<DMatch> matches;
Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
||||||, train).time(srcSize.height == 2000 ? 9 : 4 );
randu(query, 0.0f, 1.0f);
randu(train, 0.0f, 1.0f);
BFMatcher matcher(NORM_L2);
TEST_CYCLE() matcher.match(query, train, matches);
else if (RUN_OCL_IMPL)
ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
ocl::oclMat oclQuery(query), oclTrain(train);
ocl::oclMat oclTrainIdx, oclDistance;
oclMatcher.matchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance);
oclMatcher.matchDownload(oclTrainIdx, oclDistance, matches);
SANITY_CHECK_MATCHES(matches, 1e-5);
PERF_TEST_P(BruteForceMatcherFixture, knnMatch,
const Size srcSize = GetParam();
vector<vector<DMatch> > matches(2);
Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
randu(query, 0.0f, 1.0f);
randu(train, 0.0f, 1.0f);
||||||, train);
if (srcSize.height == 2000)
BFMatcher matcher(NORM_L2);
TEST_CYCLE() matcher.knnMatch(query, train, matches, 2);
std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
else if (RUN_OCL_IMPL)
ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
ocl::oclMat oclQuery(query), oclTrain(train);
ocl::oclMat oclTrainIdx, oclDistance, oclAllDist;
oclMatcher.knnMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclAllDist, 2);
oclMatcher.knnMatchDownload(oclTrainIdx, oclDistance, matches);
std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
SANITY_CHECK_MATCHES(matches0, 1e-5);
SANITY_CHECK_MATCHES(matches1, 1e-5);
PERF_TEST_P(BruteForceMatcherFixture, radiusMatch,
const Size srcSize = GetParam();
const float max_distance = 2.0f;
vector<vector<DMatch> > matches(2);
Mat query(srcSize, CV_32F), train(srcSize, CV_32F);
||||||, train);
randu(query, 0.0f, 1.0f);
randu(train, 0.0f, 1.0f);
if (srcSize.height == 2000)
cv::BFMatcher matcher(NORM_L2);
TEST_CYCLE() matcher.radiusMatch(query, train, matches, max_distance);
std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
else if (RUN_OCL_IMPL)
ocl::oclMat oclQuery(query), oclTrain(train);
ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist);
ocl::oclMat oclTrainIdx, oclDistance, oclNMatches;
oclMatcher.radiusMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclNMatches, max_distance);
oclMatcher.radiusMatchDownload(oclTrainIdx, oclDistance, oclNMatches, matches);
std::vector<DMatch> & matches0 = matches[0], & matches1 = matches[1];
@ -1,85 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
///////////// StereoMatchBM ////////////////////////
PERF_TEST(StereoMatchBMFixture, StereoMatchBM)
Mat left_image = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE);
Mat right_image = imread(getDataPath("gpu/stereobm/aloe-R.png"), cv::IMREAD_GRAYSCALE);
ASSERT_TRUE(!left_image.empty()) << "no input image";
ASSERT_TRUE(!right_image.empty()) << "no input image";
ASSERT_TRUE(right_image.size() == left_image.size());
ASSERT_TRUE(right_image.size() == left_image.size());
const int n_disp = 128, winSize = 19;
Mat disp(left_image.size(), CV_16SC1);
||||||, right_image).out(disp);
ocl::oclMat oclLeft(left_image), oclRight(right_image),
oclDisp(left_image.size(), CV_16SC1);
ocl::StereoBM_OCL oclBM(0, n_disp, winSize);
OCL_TEST_CYCLE() oclBM(oclLeft, oclRight, oclDisp);
else if (RUN_PLAIN_IMPL)
Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
TEST_CYCLE() bm->compute(left_image, right_image, disp);
int value = 0;
@ -1,76 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
///////////// Canny ////////////////////////
PERF_TEST(CannyFixture, Canny)
Mat img = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE),
edges(img.size(), CV_8UC1);
ASSERT_TRUE(!img.empty()) << "can't open aloe-L.png";
ocl::oclMat oclImg(img), oclEdges(img.size(), CV_8UC1);
OCL_TEST_CYCLE() ocl::Canny(oclImg, oclEdges, 50.0, 100.0);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() Canny(img, edges, 50.0, 100.0);
int value = 0;
@ -1,110 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
using std::tr1::make_tuple;
///////////// cvtColor////////////////////////
typedef tuple<Size, tuple<ConversionTypes, int, int> > cvtColorParams;
typedef TestBaseWithParam<cvtColorParams> cvtColorFixture;
PERF_TEST_P(cvtColorFixture, cvtColor, testing::Combine(
testing::Values(Size(1000, 1002), Size(2000, 2004), Size(4000, 4008)),
make_tuple(ConversionTypes(COLOR_RGB2GRAY), 3, 1),
make_tuple(ConversionTypes(COLOR_RGB2BGR), 3, 3),
make_tuple(ConversionTypes(COLOR_RGB2YUV), 3, 3),
make_tuple(ConversionTypes(COLOR_YUV2RGB), 3, 3),
make_tuple(ConversionTypes(COLOR_RGB2YCrCb), 3, 3),
make_tuple(ConversionTypes(COLOR_YCrCb2RGB), 3, 3),
make_tuple(ConversionTypes(COLOR_RGB2XYZ), 3, 3),
make_tuple(ConversionTypes(COLOR_XYZ2RGB), 3, 3),
make_tuple(ConversionTypes(COLOR_RGB2HSV), 3, 3),
make_tuple(ConversionTypes(COLOR_HSV2RGB), 3, 3),
make_tuple(ConversionTypes(COLOR_RGB2HLS), 3, 3),
make_tuple(ConversionTypes(COLOR_HLS2RGB), 3, 3),
make_tuple(ConversionTypes(COLOR_BGR5652BGR), 2, 3),
make_tuple(ConversionTypes(COLOR_BGR2BGR565), 3, 2),
make_tuple(ConversionTypes(COLOR_RGBA2mRGBA), 4, 4),
make_tuple(ConversionTypes(COLOR_mRGBA2RGBA), 4, 4),
make_tuple(ConversionTypes(COLOR_YUV2RGB_NV12), 1, 3)
cvtColorParams params = GetParam();
const Size srcSize = get<0>(params);
const tuple<int, int, int> conversionParams = get<1>(params);
const int code = get<0>(conversionParams), scn = get<1>(conversionParams),
dcn = get<2>(conversionParams);
Mat src(srcSize, CV_8UC(scn)), dst(srcSize, CV_8UC(scn));
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(src.size(), dst.type());
OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, code, dcn);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::cvtColor(src, dst, code, dcn);
@ -1,93 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
// Authors:
// * Peter Andreas Entschev,
#include "perf_precomp.hpp"
using namespace perf;
///////////// FAST ////////////////////////
typedef std::tr1::tuple<std::string, int, bool> Image_Threshold_NonmaxSupression_t;
typedef perf::TestBaseWithParam<Image_Threshold_NonmaxSupression_t> Image_Threshold_NonmaxSupression;
PERF_TEST_P(Image_Threshold_NonmaxSupression, FAST,
const Image_Threshold_NonmaxSupression_t params = GetParam();
const std::string imgFile = std::tr1::get<0>(params);
const int threshold = std::tr1::get<1>(params);
const bool nonmaxSupression = std::tr1::get<2>(params);
const cv::Mat img = imread(getDataPath(imgFile), cv::IMREAD_GRAYSCALE);
cv::ocl::FAST_OCL fast(threshold, nonmaxSupression, 0.5);
cv::ocl::oclMat d_img(img);
cv::ocl::oclMat d_keypoints;
OCL_TEST_CYCLE() fast(d_img, cv::ocl::oclMat(), d_keypoints);
std::vector<cv::KeyPoint> ocl_keypoints;
fast.downloadKeypoints(d_keypoints, ocl_keypoints);
else if (RUN_PLAIN_IMPL)
std::vector<cv::KeyPoint> cpu_keypoints;
TEST_CYCLE() cv::FAST(img, cpu_keypoints, threshold, nonmaxSupression);
@ -1,88 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
///////////// dft ////////////////////////
typedef TestBaseWithParam<Size> dftFixture;
const Size srcSize = GetParam();
Mat src(srcSize, CV_32FC2), dst;
randu(src, 0.0f, 1.0f);
if (srcSize == OCL_SIZE_4000)
ocl::oclMat oclSrc(src), oclDst;
OCL_TEST_CYCLE() cv::ocl::dft(oclSrc, oclDst);
SANITY_CHECK(dst, 1.5);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::dft(src, dst);
@ -1,416 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::get;
using std::tr1::tuple;
///////////// Blur////////////////////////
typedef Size_MatType BlurFixture;
PERF_TEST_P(BlurFixture, Blur,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params), ksize(3, 3);
const int type = get<1>(params), bordertype = BORDER_CONSTANT;
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::blur(oclSrc, oclDst, ksize, Point(-1, -1), bordertype);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::blur(src, dst, ksize, Point(-1, -1), bordertype);
///////////// Laplacian////////////////////////
typedef Size_MatType LaplacianFixture;
PERF_TEST_P(LaplacianFixture, Laplacian,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), ksize = 3;
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::Laplacian(oclSrc, oclDst, -1, ksize, 1);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize, 1);
///////////// Erode ////////////////////
typedef Size_MatType ErodeFixture;
PERF_TEST_P(ErodeFixture, Erode,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), ksize = 3;
const Mat ker = getStructuringElement(MORPH_RECT, Size(ksize, ksize));
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst).in(ker);
if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKer(ker);
OCL_TEST_CYCLE() cv::ocl::erode(oclSrc, oclDst, oclKer);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::erode(src, dst, ker);
///////////// Sobel ////////////////////////
typedef Size_MatType SobelFixture;
PERF_TEST_P(SobelFixture, Sobel,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), dx = 1, dy = 1;
checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
if ((srcSize == OCL_SIZE_2000 && type == CV_8UC4) ||
(srcSize == OCL_SIZE_4000 && type == CV_8UC1))
else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::Sobel(oclSrc, oclDst, -1, dx, dy);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::Sobel(src, dst, -1, dx, dy);
///////////// Scharr ////////////////////////
typedef Size_MatType ScharrFixture;
PERF_TEST_P(ScharrFixture, Scharr,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), dx = 1, dy = 0;
checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
if ((srcSize == OCL_SIZE_2000 && type == CV_8UC4) ||
(srcSize == OCL_SIZE_4000 && type == CV_8UC1))
else if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::Scharr(oclSrc, oclDst, -1, dx, dy);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::Scharr(src, dst, -1, dx, dy);
///////////// GaussianBlur ////////////////////////
typedef Size_MatType GaussianBlurFixture;
PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), ksize = 7;
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
const double eps = src.depth() == CV_8U ? 1 + DBL_EPSILON : 3e-4;
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::GaussianBlur(oclSrc, oclDst, Size(ksize, ksize), 0);
SANITY_CHECK(dst, eps);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::GaussianBlur(src, dst, Size(ksize, ksize), 0);
SANITY_CHECK(dst, eps);
///////////// filter2D////////////////////////
typedef Size_MatType filter2DFixture;
PERF_TEST_P(filter2DFixture, filter2D,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), ksize = 3;
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst(srcSize, type), kernel(ksize, ksize, CV_32SC1);
||||||, WARMUP_RNG).in(kernel).out(dst);
randu(kernel, -3.0, 3.0);
if (srcSize == OCL_SIZE_4000 && type == CV_8UC4)
ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKernel(kernel);
OCL_TEST_CYCLE() cv::ocl::filter2D(oclSrc, oclDst, -1, oclKernel);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
///////////// Bilateral////////////////////////
typedef Size_MatType BilateralFixture;
PERF_TEST_P(BilateralFixture, Bilateral,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), d = 7;
const double sigmacolor = 50.0, sigmaspace = 50.0;
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
if (srcSize == OCL_SIZE_4000)
declare.time(type == CV_8UC3 ? 8 : 4.5);
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::bilateralFilter(oclSrc, oclDst, d, sigmacolor, sigmaspace);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::bilateralFilter(src, dst, d, sigmacolor, sigmaspace);
///////////// adaptiveBilateral////////////////////////
typedef Size_MatType adaptiveBilateralFixture;
PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
::testing::Combine(::testing::Values(OCL_SIZE_1000), OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
const double sigmaspace = 10.0;
Size ksize(9, 9);
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::adaptiveBilateralFilter(oclSrc, oclDst, ksize, sigmaspace);
SANITY_CHECK(dst, 1.0);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::adaptiveBilateralFilter(src, dst, ksize, sigmaspace);
@ -1,88 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
///////////// gemm ////////////////////////
typedef TestBaseWithParam<Size> gemmFixture;
PERF_TEST_P(gemmFixture, gemm, ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000))
const Size srcSize = GetParam();
Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
src3(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
||||||, src2, src3).out(dst).time(srcSize == OCL_SIZE_2000 ? 65 : 8);
randu(src1, -10.0f, 10.0f);
randu(src2, -10.0f, 10.0f);
randu(src3, -10.0f, 10.0f);
ocl::oclMat oclSrc1(src1), oclSrc2(src2),
oclSrc3(src3), oclDst(srcSize, CV_32FC1);
OCL_TEST_CYCLE() cv::ocl::gemm(oclSrc1, oclSrc2, 1.0, oclSrc3, 1.0, oclDst);
SANITY_CHECK(dst, 0.01);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::gemm(src1, src2, 1.0, src3, 1.0, dst);
SANITY_CHECK(dst, 0.01);
@ -1,95 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
///////////// GoodFeaturesToTrack ////////////////////////
typedef tuple<string, double> GoodFeaturesToTrackParams;
typedef TestBaseWithParam<GoodFeaturesToTrackParams> GoodFeaturesToTrackFixture;
PERF_TEST_P(GoodFeaturesToTrackFixture, GoodFeaturesToTrack,
::testing::Range(0.0, 4.0, 3.0)))
const GoodFeaturesToTrackParams param = GetParam();
const string fileName = getDataPath(get<0>(param));
const int maxCorners = 2000;
const double qualityLevel = 0.01, minDistance = get<1>(param);
Mat frame = imread(fileName, IMREAD_GRAYSCALE);
ASSERT_TRUE(!frame.empty()) << "no input image";
vector<Point2f> pts_gold;
ocl::oclMat oclFrame(frame), pts_oclmat;
ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance);
OCL_TEST_CYCLE() detector(oclFrame, pts_oclmat);
detector.downloadPoints(pts_oclmat, pts_gold);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::goodFeaturesToTrack(frame, pts_gold,
maxCorners, qualityLevel, minDistance);
@ -1,153 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
#include "opencv2/objdetect/objdetect_c.h"
using namespace perf;
///////////// Haar ////////////////////////
PERF_TEST(HaarFixture, Haar)
vector<Rect> faces;
Mat img = imread(getDataPath("gpu/haarcascade/basketball1.png"), IMREAD_GRAYSCALE);
ASSERT_TRUE(!img.empty()) << "can't open basketball1.png";
CascadeClassifier faceCascade;
<< "can't load haarcascade_frontalface_alt.xml";
TEST_CYCLE() faceCascade.detectMultiScale(img, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
SANITY_CHECK(faces, 4 + 1e-4);
else if (RUN_OCL_IMPL)
ocl::OclCascadeClassifier faceCascade;
ocl::oclMat oclImg(img);
<< "can't load haarcascade_frontalface_alt.xml";
OCL_TEST_CYCLE() faceCascade.detectMultiScale(oclImg, faces,
1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
SANITY_CHECK(faces, 4 + 1e-4);
using namespace std;
using namespace cv;
using namespace perf;
using std::tr1::make_tuple;
using std::tr1::get;
typedef std::tr1::tuple<std::string, std::string, int> OCL_Cascade_Image_MinSize_t;
typedef perf::TestBaseWithParam<OCL_Cascade_Image_MinSize_t> OCL_Cascade_Image_MinSize;
PERF_TEST_P( OCL_Cascade_Image_MinSize, CascadeClassifier,
testing::Values( string("cv/cascadeandhog/cascades/haarcascade_frontalface_alt.xml") ),
testing::Values( string("cv/shared/lena.png"),
string("cv/cascadeandhog/images/class57.png")*/ ),
testing::Values(30, 64, 90) ) )
const string cascasePath = get<0>(GetParam());
const string imagePath = get<1>(GetParam());
const int min_size = get<2>(GetParam());
Size minSize(min_size, min_size);
vector<Rect> faces;
Mat img = imread(getDataPath(imagePath), IMREAD_GRAYSCALE);
ASSERT_TRUE(!img.empty()) << "Can't load source image: " << getDataPath(imagePath);
equalizeHist(img, img);
CascadeClassifier cc;
ASSERT_TRUE(cc.load(getDataPath(cascasePath))) << "Can't load cascade file: " << getDataPath(cascasePath);
while (next())
cc.detectMultiScale(img, faces, 1.1, 3, 0, minSize);
else if (RUN_OCL_IMPL)
ocl::oclMat uimg(img);
ocl::OclCascadeClassifier cc;
ASSERT_TRUE(cc.load(getDataPath(cascasePath))) << "Can't load cascade file: " << getDataPath(cascasePath);
while (next())
cc.detectMultiScale(uimg, faces, 1.1, 3, 0, minSize);
//sort(faces.begin(), faces.end(), comparators::RectLess());
SANITY_CHECK_NOTHING();//(faces, min_size/5);
// using SANITY_CHECK_NOTHING() since OCL and PLAIN version may find different faces number
@ -1,100 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
///////////// HOG////////////////////////
struct RectLess :
public std::binary_function<cv::Rect, cv::Rect, bool>
bool operator()(const cv::Rect& a,
const cv::Rect& b) const
if (a.x != b.x)
return a.x < b.x;
else if (a.y != b.y)
return a.y < b.y;
else if (a.width != b.width)
return a.width < b.width;
return a.height < b.height;
Mat src = imread(getDataPath("gpu/hog/road.png"), cv::IMREAD_GRAYSCALE);
ASSERT_TRUE(!src.empty()) << "can't open input image road.png";
vector<cv::Rect> found_locations;
HOGDescriptor hog;
TEST_CYCLE() hog.detectMultiScale(src, found_locations);
std::sort(found_locations.begin(), found_locations.end(), RectLess());
SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
else if (RUN_OCL_IMPL)
ocl::HOGDescriptor ocl_hog;
ocl::oclMat oclSrc(src);
OCL_TEST_CYCLE() ocl_hog.detectMultiScale(oclSrc, found_locations);
std::sort(found_locations.begin(), found_locations.end(), RectLess());
SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
@ -1,106 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace cv;
using namespace perf;
// HoughCircles
typedef std::tr1::tuple<cv::Size, float, float> Size_Dp_MinDist_t;
typedef perf::TestBaseWithParam<Size_Dp_MinDist_t> Size_Dp_MinDist;
PERF_TEST_P(Size_Dp_MinDist, OCL_HoughCircles,
testing::Values(perf::sz720p, perf::szSXGA, perf::sz1080p),
testing::Values(1.0f, 2.0f, 4.0f),
testing::Values(1.0f, 10.0f)))
const Size_Dp_MinDist_t params = GetParam();
const cv::Size size = std::tr1::get<0>(params);
const float dp = std::tr1::get<1>(params);
const float minDist = std::tr1::get<2>(params);
const int minRadius = 10;
const int maxRadius = 30;
const int cannyThreshold = 100;
const int votesThreshold = 15;
cv::RNG rng(123456789);
cv::Mat src(size, CV_8UC1, cv::Scalar::all(0)), circles;
const int numCircles = rng.uniform(50, 100);
for (int i = 0; i < numCircles; ++i)
cv::Point center(rng.uniform(0, src.cols), rng.uniform(0, src.rows));
const int radius = rng.uniform(minRadius, maxRadius + 1);
cv::circle(src, center, radius, cv::Scalar::all(255), -1);
cv::ocl::oclMat ocl_src(src), ocl_circles;
OCL_TEST_CYCLE() cv::ocl::HoughCircles(ocl_src, ocl_circles, HOUGH_GRADIENT, dp, minDist,
cannyThreshold, votesThreshold, minRadius, maxRadius);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::HoughCircles(src, circles, HOUGH_GRADIENT, dp, minDist, cannyThreshold,
votesThreshold, minRadius, maxRadius);
int value = 0;
#endif // HAVE_OPENCL
@ -1,737 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
///////////// equalizeHist ////////////////////////
typedef TestBaseWithParam<Size> equalizeHistFixture;
PERF_TEST_P(equalizeHistFixture, equalizeHist, OCL_TYPICAL_MAT_SIZES)
const Size srcSize = GetParam();
const double eps = 1 + DBL_EPSILON;
Mat src(srcSize, CV_8UC1), dst(srcSize, CV_8UC1);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(srcSize, src.type());
OCL_TEST_CYCLE() cv::ocl::equalizeHist(oclSrc, oclDst);
SANITY_CHECK(dst, eps);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::equalizeHist(src, dst);
SANITY_CHECK(dst, eps);
/////////// CopyMakeBorder //////////////////////
typedef tuple<Size, MatType, Border> CopyMakeBorderParamType;
typedef TestBaseWithParam<CopyMakeBorderParamType> CopyMakeBorderFixture;
PERF_TEST_P(CopyMakeBorderFixture, CopyMakeBorder,
const CopyMakeBorderParamType params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), borderType = get<2>(params);
Mat src(srcSize, type), dst;
const Size dstSize = srcSize + Size(12, 12);
dst.create(dstSize, type);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
OCL_TEST_CYCLE() cv::ocl::copyMakeBorder(oclSrc, oclDst, 7, 5, 5, 7, borderType, cv::Scalar(1.0));
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::copyMakeBorder(src, dst, 7, 5, 5, 7, borderType, cv::Scalar(1.0));
///////////// cornerMinEigenVal ////////////////////////
typedef Size_MatType cornerMinEigenValFixture;
PERF_TEST_P(cornerMinEigenValFixture, cornerMinEigenVal,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), borderType = BORDER_REFLECT;
const int blockSize = 7, apertureSize = 1 + 2 * 3;
Mat src(srcSize, type), dst(srcSize, CV_32FC1);
||||||, WARMUP_RNG).out(dst)
.time(srcSize == OCL_SIZE_4000 ? 20 : srcSize == OCL_SIZE_2000 ? 5 : 3);
const int depth = CV_MAT_DEPTH(type);
const ERROR_TYPE errorType = depth == CV_8U ? ERROR_ABSOLUTE : ERROR_RELATIVE;
ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
OCL_TEST_CYCLE() cv::ocl::cornerMinEigenVal(oclSrc, oclDst, blockSize, apertureSize, borderType);
SANITY_CHECK(dst, 1e-6, errorType);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType);
SANITY_CHECK(dst, 1e-6, errorType);
///////////// cornerHarris ////////////////////////
typedef Size_MatType cornerHarrisFixture;
PERF_TEST_P(cornerHarrisFixture, cornerHarris,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), borderType = BORDER_REFLECT;
Mat src(srcSize, type), dst(srcSize, CV_32FC1);
randu(src, 0, 1);
.time(srcSize == OCL_SIZE_4000 ? 20 : srcSize == OCL_SIZE_2000 ? 5 : 3);
ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
OCL_TEST_CYCLE() cv::ocl::cornerHarris(oclSrc, oclDst, 5, 7, 0.1, borderType);
SANITY_CHECK(dst, 3e-5);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::cornerHarris(src, dst, 5, 7, 0.1, borderType);
SANITY_CHECK(dst, 3e-5);
///////////// integral ////////////////////////
typedef TestBaseWithParam<Size> integralFixture;
PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES)
const Size srcSize = GetParam();
Mat src(srcSize, CV_8UC1), dst;
||||||, WARMUP_RNG);
ocl::oclMat oclSrc(src), oclDst;
OCL_TEST_CYCLE() cv::ocl::integral(oclSrc, oclDst);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::integral(src, dst);
///////////// threshold////////////////////////
typedef tuple<Size, MatType, ThreshType> ThreshParams;
typedef TestBaseWithParam<ThreshParams> ThreshFixture;
PERF_TEST_P(ThreshFixture, threshold,
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC4, CV_32FC1),
const ThreshParams params = GetParam();
const Size srcSize = get<0>(params);
const int srcType = get<1>(params);
const int threshType = get<2>(params);
const double maxValue = 220.0, threshold = 50;
Mat src(srcSize, srcType), dst(srcSize, srcType);
randu(src, 0, 100);
ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8U);
OCL_TEST_CYCLE() cv::ocl::threshold(oclSrc, oclDst, threshold, maxValue, threshType);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::threshold(src, dst, threshold, maxValue, threshType);
///////////// meanShiftFiltering////////////////////////
typedef struct _COOR
short x;
short y;
static COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
int isr2 = sr * sr;
int c0, c1, c2, c3;
int iter;
uchar *ptr = NULL;
uchar *pstart = NULL;
int revx = 0, revy = 0;
c0 = sptr[0];
c1 = sptr[1];
c2 = sptr[2];
c3 = sptr[3];
// iterate meanshift procedure
for(iter = 0; iter < maxIter; iter++ )
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0 - sp;
int miny = y0 - sp;
int maxx = x0 + sp;
int maxy = y0 + sp;
//deal with the image boundary
if(minx < 0) minx = 0;
if(miny < 0) miny = 0;
if(maxx >= size.width) maxx = size.width - 1;
if(maxy >= size.height) maxy = size.height - 1;
if(iter == 0)
pstart = sptr;
pstart = pstart + revy * sstep + (revx << 2); //point to the new position
ptr = pstart;
ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
int rowCount = 0;
int x = minx;
for( ; x + 4 <= maxx; x += 4, ptr += 16)
int t0, t1, t2;
t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 1;
t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 2;
t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
s0 += t0;
s1 += t1;
s2 += t2;
sx += x + 3;
for(; x <= maxx; x++, ptr += 4)
int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
s0 += t0;
s1 += t1;
s2 += t2;
sx += x;
if(rowCount == 0)
count += rowCount;
sy += y * rowCount;
if( count == 0 )
int x1 = sx / count;
int y1 = sy / count;
s0 = s0 / count;
s1 = s1 / count;
s2 = s2 / count;
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
//revise the pointer corresponding to the new (y0,x0)
revx = x1 - x0;
revy = y1 - y0;
x0 = x1;
y0 = y1;
c0 = s0;
c1 = s1;
c2 = s2;
if( stopFlag )
} //for iter
dptr[0] = (uchar)c0;
dptr[1] = (uchar)c1;
dptr[2] = (uchar)c2;
dptr[3] = (uchar)c3;
COOR coor;
coor.x = static_cast<short>(x0);
coor.y = static_cast<short>(y0);
return coor;
static void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit)
if( src_roi.empty() )
CV_Error( Error::StsBadArg, "The input image is empty" );
if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
CV_Error( Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
dst_roi.create(src_roi.size(), src_roi.type());
CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
CV_Assert( !(dst_roi.step & 0x3) );
if( !(crit.type & cv::TermCriteria::MAX_ITER) )
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if( !(crit.type & cv::TermCriteria::EPS) )
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for(int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr =;
uchar *dptr =;
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
cv::Size size = src_roi.size();
for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2))
for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
typedef TestBaseWithParam<Size> meanShiftFilteringFixture;
PERF_TEST_P(meanShiftFilteringFixture, meanShiftFiltering,
const Size srcSize = GetParam();
const int sp = 5, sr = 6;
cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);
Mat src(srcSize, CV_8UC4), dst(srcSize, CV_8UC4);
||||||, WARMUP_RNG).out(dst)
.time(srcSize == OCL_SIZE_4000 ?
56 : srcSize == OCL_SIZE_2000 ? 15 : 3.8);
TEST_CYCLE() meanShiftFiltering_(src, dst, sp, sr, crit);
else if (RUN_OCL_IMPL)
ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8UC4);
OCL_TEST_CYCLE() ocl::meanShiftFiltering(oclSrc, oclDst, sp, sr, crit);
static void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
if (src_roi.empty())
CV_Error(Error::StsBadArg, "The input image is empty");
if (src_roi.depth() != CV_8U || src_roi.channels() != 4)
CV_Error(Error::StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported");
dst_roi.create(src_roi.size(), src_roi.type());
dstCoor_roi.create(src_roi.size(), CV_16SC2);
CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) &&
(src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows));
CV_Assert(!(dstCoor_roi.step & 0x3));
if (!(crit.type & cv::TermCriteria::MAX_ITER))
crit.maxCount = 5;
int maxIter = std::min(std::max(crit.maxCount, 1), 100);
float eps;
if (!(crit.type & cv::TermCriteria::EPS))
eps = 1.f;
eps = (float)std::max(crit.epsilon, 0.0);
int tab[512];
for (int i = 0; i < 512; i++)
tab[i] = (i - 255) * (i - 255);
uchar *sptr =;
uchar *dptr =;
short *dCoorptr = (short *);
int sstep = (int)src_roi.step;
int dstep = (int)dst_roi.step;
int dCoorstep = (int)dstCoor_roi.step >> 1;
cv::Size size = src_roi.size();
for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1))
for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2)
*((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
typedef TestBaseWithParam<Size> meanShiftProcFixture;
PERF_TEST_P(meanShiftProcFixture, meanShiftProc,
const Size srcSize = GetParam();
TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1);
Mat src(srcSize, CV_8UC4), dst1(srcSize, CV_8UC4),
dst2(srcSize, CV_16SC2);
||||||, WARMUP_RNG).out(dst1, dst2)
.time(srcSize == OCL_SIZE_4000 ?
56 : srcSize == OCL_SIZE_2000 ? 15 : 3.8);;
TEST_CYCLE() meanShiftProc_(src, dst1, dst2, 5, 6, crit);
else if (RUN_OCL_IMPL)
ocl::oclMat oclSrc(src), oclDst1(srcSize, CV_8UC4),
oclDst2(srcSize, CV_16SC2);
OCL_TEST_CYCLE() ocl::meanShiftProc(oclSrc, oclDst1, oclDst2, 5, 6, crit);
///////////// CLAHE ////////////////////////
typedef TestBaseWithParam<Size> CLAHEFixture;
const Size srcSize = GetParam();
const string impl = getSelectedImpl();
Mat src(srcSize, CV_8UC1), dst;
const double clipLimit = 40.0;
||||||, WARMUP_RNG);
if (srcSize == OCL_SIZE_4000)
ocl::oclMat oclSrc(src), oclDst;
cv::Ptr<cv::CLAHE> oclClahe = cv::ocl::createCLAHE(clipLimit);
OCL_TEST_CYCLE() oclClahe->apply(oclSrc, oclDst);
else if (RUN_PLAIN_IMPL)
cv::Ptr<cv::CLAHE> clahe = cv::createCLAHE(clipLimit);
TEST_CYCLE() clahe->apply(src, dst);
///////////// columnSum////////////////////////
typedef TestBaseWithParam<Size> columnSumFixture;
static void columnSumPerfTest(const Mat & src, Mat & dst)
for (int j = 0; j < src.cols; j++)
||||||<float>(0, j) =<float>(0, j);
for (int i = 1; i < src.rows; ++i)
for (int j = 0; j < src.cols; ++j)
||||||<float>(i, j) =<float>(i - 1 , j) +<float>(i , j);
PERF_TEST_P(columnSumFixture, columnSum, OCL_TYPICAL_MAT_SIZES)
const Size srcSize = GetParam();
Mat src(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
||||||, WARMUP_RNG).out(dst);
if (srcSize == OCL_SIZE_4000)
ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1);
OCL_TEST_CYCLE() cv::ocl::columnSum(oclSrc, oclDst);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() columnSumPerfTest(src, dst);
typedef tuple<Size, DistType> distanceToCentersParameters;
typedef TestBaseWithParam<distanceToCentersParameters> distanceToCentersFixture;
static void distanceToCentersPerfTest(Mat& src, Mat& centers, Mat& dists, Mat& labels, int distType)
Mat batch_dists;
cv::batchDistance(src, centers, batch_dists, CV_32FC1, noArray(), distType);
std::vector<float> dists_v;
std::vector<int> labels_v;
for (int i = 0; i < batch_dists.rows; i++)
Mat r = batch_dists.row(i);
double mVal;
Point mLoc;
minMaxLoc(r, &mVal, NULL, &mLoc, NULL);
PERF_TEST_P(distanceToCentersFixture, distanceToCenters, ::testing::Combine(::testing::Values(cv::Size(256,256), cv::Size(512,512)), DistType::all()) )
Size size = get<0>(GetParam());
int distType = get<1>(GetParam());
Mat src(size, CV_32FC1), centers(size, CV_32FC1);
Mat dists(src.rows, 1, CV_32FC1), labels(src.rows, 1, CV_32SC1);
||||||, centers, WARMUP_RNG).out(dists, labels);
ocl::oclMat ocl_src(src), ocl_centers(centers);
OCL_TEST_CYCLE() ocl::distanceToCenters(ocl_src, ocl_centers, dists, labels, distType);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() distanceToCentersPerfTest(src, centers, dists, labels, distType);
@ -1,364 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
///////////// WarpAffine ////////////////////////
typedef Size_MatType WarpAffineFixture;
PERF_TEST_P(WarpAffineFixture, WarpAffine,
static const double coeffs[2][3] =
{ cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
{ sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
Mat M(2, 3, CV_64F, (void *)coeffs);
const int interpolation = INTER_NEAREST;
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
///////////// WarpPerspective ////////////////////////
typedef Size_MatType WarpPerspectiveFixture;
PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
static const double coeffs[3][3] =
{cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
{sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
{0.0, 0.0, 1.0}
Mat M(3, 3, CV_64F, (void *)coeffs);
const int interpolation = INTER_LINEAR;
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst)
.time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
///////////// resize ////////////////////////
typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
typedef TestBaseWithParam<resizeParams> resizeFixture;
PERF_TEST_P(resizeFixture, resize,
::testing::Values(0.5, 2.0)))
const resizeParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), interType = get<2>(params);
double scale = get<3>(params);
const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
checkDeviceMaxMemoryAllocSize(srcSize, type);
checkDeviceMaxMemoryAllocSize(dstSize, type);
Mat src(srcSize, type), dst;
dst.create(dstSize, type);
||||||, WARMUP_RNG).out(dst);
if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
typedef tuple<Size, MatType, double> resizeAreaParams;
typedef TestBaseWithParam<resizeAreaParams> resizeAreaFixture;
PERF_TEST_P(resizeAreaFixture, resize,
::testing::Values(0.3, 0.5, 0.6)))
const resizeAreaParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
double scale = get<2>(params);
const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type), dst;
dst.create(dstSize, type);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, cv::INTER_AREA);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, cv::INTER_AREA);
///////////// remap////////////////////////
typedef tuple<Size, MatType, RemapInterType> remapParams;
typedef TestBaseWithParam<remapParams> remapFixture;
PERF_TEST_P(remapFixture, remap,
const remapParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), interpolation = get<2>(params);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
Mat xmap, ymap;
xmap.create(srcSize, CV_32FC1);
ymap.create(srcSize, CV_32FC1);
for (int i = 0; i < srcSize.height; ++i)
float * const xmap_row = xmap.ptr<float>(i);
float * const ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < srcSize.width; ++j)
xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
const int borderMode = BORDER_CONSTANT;
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
ocl::oclMat oclXMap(xmap), oclYMap(ymap);
OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
///////////// buildWarpPerspectiveMaps ////////////////////////
static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
CV_Assert(M.rows == 3 && M.cols == 3);
CV_Assert(dsize.area() > 0);
xmap.create(dsize, CV_32FC1);
ymap.create(dsize, CV_32FC1);
float coeffs[3 * 3];
Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
if (inverse)
M.convertTo(coeffsMat, coeffsMat.type());
cv::Mat iM;
invert(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
for (int y = 0; y < dsize.height; ++y)
float * const xmap_ptr = xmap.ptr<float>(y);
float * const ymap_ptr = ymap.ptr<float>(y);
for (int x = 0; x < dsize.width; ++x)
float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
typedef TestBaseWithParam<Size> buildWarpPerspectiveMapsFixture;
PERF_TEST_P(buildWarpPerspectiveMapsFixture, Inverse, OCL_TYPICAL_MAT_SIZES)
static const double coeffs[3][3] =
{cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
{sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
{0.0, 0.0, 1.0}
Mat M(3, 3, CV_64F, (void *)coeffs);
const Size dsize = GetParam();
const double eps = 5e-4;
Mat xmap(dsize, CV_32FC1), ymap(dsize, CV_32FC1);
||||||, ymap);
ocl::oclMat oclXMap(dsize, CV_32FC1), oclYMap(dsize, CV_32FC1);
OCL_TEST_CYCLE() cv::ocl::buildWarpPerspectiveMaps(M, true, dsize, oclXMap, oclYMap);
SANITY_CHECK(xmap, eps);
SANITY_CHECK(ymap, eps);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() buildWarpPerspectiveMaps(M, true, dsize, xmap, ymap);
SANITY_CHECK(xmap, eps);
SANITY_CHECK(ymap, eps);
@ -1,103 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using namespace std;
using namespace cv::ocl;
using namespace cv;
using std::tr1::tuple;
using std::tr1::get;
///////////// Kalman Filter ////////////////////////
typedef tuple<int> KalmanFilterType;
typedef TestBaseWithParam<KalmanFilterType> KalmanFilterFixture;
PERF_TEST_P(KalmanFilterFixture, KalmanFilter,
::testing::Values(1000, 1500))
KalmanFilterType params = GetParam();
const int dim = get<0>(params);
cv::Mat sample(dim, 1, CV_32FC1), dresult;
randu(sample, -1, 1);
cv::Mat statePre_;
cv::KalmanFilter kalman;
kalman.init(dim, dim);
statePre_ = kalman.statePre;
else if(RUN_OCL_IMPL)
cv::ocl::oclMat dsample(sample);
cv::ocl::KalmanFilter kalman_ocl;
kalman_ocl.init(dim, dim);
@ -1,121 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
/////////// matchTemplate ////////////////////////
typedef Size_MatType CV_TM_CCORRFixture;
PERF_TEST_P(CV_TM_CCORRFixture, matchTemplate,
::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params), templSize(5, 5);
const int type = get<1>(params);
Mat src(srcSize, type), templ(templSize, type);
const Size dstSize(src.cols - templ.cols + 1, src.rows - templ.rows + 1);
Mat dst(dstSize, CV_32F);
randu(src, 0.0f, 1.0f);
randu(templ, 0.0f, 1.0f);
declare.time(srcSize == OCL_SIZE_2000 ? 20 : 6).in(src, templ).out(dst);
ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_32F);
OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR);
SANITY_CHECK(dst, 1e-4);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR);
SANITY_CHECK(dst, 1e-4);
typedef TestBaseWithParam<Size> CV_TM_CCORR_NORMEDFixture;
const Size srcSize = GetParam(), templSize(5, 5);
Mat src(srcSize, CV_8UC1), templ(templSize, CV_8UC1), dst;
const Size dstSize(src.cols - templ.cols + 1, src.rows - templ.rows + 1);
dst.create(dstSize, CV_8UC1);
||||||, templ, WARMUP_RNG).out(dst)
.time(srcSize == OCL_SIZE_2000 ? 10 : srcSize == OCL_SIZE_4000 ? 23 : 2);
ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_8UC1);
OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR_NORMED);
SANITY_CHECK(dst, 3e-2);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::matchTemplate(src, templ, dst, TM_CCORR_NORMED);
SANITY_CHECK(dst, 3e-2);
@ -1,238 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
///////////// ConvertTo////////////////////////
typedef Size_MatType ConvertToFixture;
PERF_TEST_P(ConvertToFixture, ConvertTo,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
Mat src(srcSize, type), dst;
const int dstType = CV_MAKE_TYPE(CV_32F, src.channels());
checkDeviceMaxMemoryAllocSize(srcSize, type);
checkDeviceMaxMemoryAllocSize(srcSize, dstType);
dst.create(srcSize, dstType);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(srcSize, dstType);
OCL_TEST_CYCLE() oclSrc.convertTo(oclDst, dstType);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() src.convertTo(dst, dstType);
///////////// copyTo////////////////////////
typedef Size_MatType copyToFixture;
PERF_TEST_P(copyToFixture, copyTo,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
Mat src(srcSize, type), dst(srcSize, type);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(srcSize, type);
OCL_TEST_CYCLE() oclSrc.copyTo(oclDst);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() src.copyTo(dst);
///////////// setTo////////////////////////
typedef Size_MatType setToFixture;
PERF_TEST_P(setToFixture, setTo,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
const Scalar val(1, 2, 3, 4);
Mat src(srcSize, type);
ocl::oclMat oclSrc(srcSize, type);
OCL_TEST_CYCLE() oclSrc.setTo(val);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() src.setTo(val);
#if 0
/////////////////// upload ///////////////////////////
typedef tuple<Size, MatDepth, int> uploadParams;
typedef TestBaseWithParam<uploadParams> uploadFixture;
PERF_TEST_P(uploadFixture, upload,
testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F),
testing::Range(1, 5)))
const uploadParams params = GetParam();
const Size srcSize = get<0>(params);
const int depth = get<1>(params), cn = get<2>(params);
const int type = CV_MAKE_TYPE(depth, cn);
Mat src(srcSize, type), dst;
||||||, WARMUP_RNG);
ocl::oclMat oclDst;
for(; startTimer(), next(); ocl::finish(), stopTimer(), oclDst.release())
else if (RUN_PLAIN_IMPL)
for(; startTimer(), next(); ocl::finish(), stopTimer(), dst.release())
dst = src.clone();
/////////////////// download ///////////////////////////
typedef TestBaseWithParam<uploadParams> downloadFixture;
PERF_TEST_P(downloadFixture, download,
testing::Values(CV_8U, CV_8S, CV_16U, CV_16S, CV_32S, CV_32F),
testing::Range(1, 5)))
const uploadParams params = GetParam();
const Size srcSize = get<0>(params);
const int depth = get<1>(params), cn = get<2>(params);
const int type = CV_MAKE_TYPE(depth, cn);
Mat src(srcSize, type), dst;
||||||, WARMUP_RNG);
ocl::oclMat oclSrc(src);
for(; startTimer(), next(); ocl::finish(), stopTimer(), dst.release())
else if (RUN_PLAIN_IMPL)
for(; startTimer(), next(); ocl::finish(), stopTimer(), dst.release())
dst = src.clone();
@ -1,109 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jin Ma,
// Xiaopeng Fu,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using namespace std;
using namespace cv::ocl;
using namespace cv;
using std::tr1::tuple;
using std::tr1::get;
////////////////////////////////// K-NEAREST NEIGHBOR ////////////////////////////////////
static void genData(Mat& trainData, Size size, Mat& trainLabel = Mat().setTo(Scalar::all(0)), int nClasses = 0)
trainData.create(size, CV_32FC1);
randu(trainData, 1.0, 100.0);
if(nClasses != 0)
trainLabel.create(size.height, 1, CV_8UC1);
randu(trainLabel, 0, nClasses - 1);
trainLabel.convertTo(trainLabel, CV_32FC1);
typedef tuple<int> KNNParamType;
typedef TestBaseWithParam<KNNParamType> KNNFixture;
testing::Values(1000, 2000, 4000))
KNNParamType params = GetParam();
const int rows = get<0>(params);
int columns = 100;
int k = rows/250;
Mat trainData, trainLabels;
Size size(columns, rows);
genData(trainData, size, trainLabels, 3);
Mat testData;
genData(testData, size);
Mat best_label;
CvKNearest knn_cpu;
knn_cpu.train(trainData, trainLabels);
knn_cpu.find_nearest(testData, k, &best_label);
}else if(RUN_OCL_IMPL)
cv::ocl::oclMat best_label_ocl;
cv::ocl::oclMat testdata;
cv::ocl::KNearestNeighbour knn_ocl;
knn_ocl.train(trainData, trainLabels);
knn_ocl.find_nearest(testdata, k, best_label_ocl);
@ -1,90 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other Materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
using namespace cv;
using namespace cv::ocl;
using namespace cvtest;
using namespace testing;
using namespace std;
///////////// Moments ////////////////////////
//*! performance of image
typedef tuple<Size, MatType, bool> MomentsParamType;
typedef TestBaseWithParam<MomentsParamType> MomentsFixture;
PERF_TEST_P(MomentsFixture, Moments,
OCL_PERF_ENUM(CV_8UC1, CV_16SC1, CV_16UC1, CV_32FC1), ::testing::Bool()))
const MomentsParamType params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
const bool binaryImage = get<2>(params);
Mat src(srcSize, type), dst(7, 1, CV_64F);
randu(src, 0, 255);
cv::Moments mom;
oclMat src_d(src);
OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(src_d, binaryImage);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() mom = cv::moments(src, binaryImage);
cv::HuMoments(mom, dst);
SANITY_CHECK(dst, 2e-1);
@ -1,86 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
///////////// norm////////////////////////
typedef tuple<Size, MatType> normParams;
typedef TestBaseWithParam<normParams> normFixture;
PERF_TEST_P(normFixture, norm, testing::Combine(
const normParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
double value = 0.0;
const double eps = CV_MAT_DEPTH(type) == CV_8U ? DBL_EPSILON : 1e-3;
Mat src1(srcSize, type), src2(srcSize, type);
||||||, src2, WARMUP_RNG);
ocl::oclMat oclSrc1(src1), oclSrc2(src2);
OCL_TEST_CYCLE() value = cv::ocl::norm(oclSrc1, oclSrc2, NORM_INF);
SANITY_CHECK(value, eps);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() value = cv::norm(src1, src2, NORM_INF);
@ -1,255 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
///////////// PyrLKOpticalFlow ////////////////////////
using namespace perf;
using std::tr1::get;
using std::tr1::tuple;
using std::tr1::make_tuple;
typedef tuple<int, tuple<string, string, LoadMode> > PyrLKOpticalFlowParamType;
typedef TestBaseWithParam<PyrLKOpticalFlowParamType> PyrLKOpticalFlowFixture;
::testing::Values(1000, 2000, 4000),
make_tuple<string, string, LoadMode>
make_tuple<string, string, LoadMode>
PyrLKOpticalFlowParamType params = GetParam();
tuple<string, string, LoadMode> fileParam = get<1>(params);
const int pointsCount = get<0>(params);
const int openMode = static_cast<int>(get<2>(fileParam));
const string fileName0 = get<0>(fileParam), fileName1 = get<1>(fileParam);
Mat frame0 = imread(getDataPath(fileName0), openMode);
Mat frame1 = imread(getDataPath(fileName1), openMode);
||||||, frame1);
ASSERT_FALSE(frame0.empty()) << "can't load " << fileName0;
ASSERT_FALSE(frame1.empty()) << "can't load " << fileName1;
Mat grayFrame;
if (openMode == IMREAD_COLOR)
cvtColor(frame0, grayFrame, COLOR_BGR2GRAY);
grayFrame = frame0;
vector<Point2f> pts, nextPts;
vector<unsigned char> status;
vector<float> err;
goodFeaturesToTrack(grayFrame, pts, pointsCount, 0.01, 0.0);
Mat ptsMat(1, static_cast<int>(pts.size()), CV_32FC2, (void *)&pts[0]);
cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err);
else if (RUN_OCL_IMPL)
ocl::PyrLKOpticalFlow oclPyrLK;
ocl::oclMat oclFrame0(frame0), oclFrame1(frame1);
ocl::oclMat oclPts(ptsMat);
ocl::oclMat oclNextPts, oclStatus, oclErr;
oclPyrLK.sparse(oclFrame0, oclFrame1, oclPts, oclNextPts, oclStatus, &oclErr);
int value = 0;
PERF_TEST(tvl1flowFixture, tvl1flow)
Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
const Size srcSize = frame0.size();
const double eps = 1.2;
Mat flow(srcSize, CV_32FC2), flow1(srcSize, CV_32FC1), flow2(srcSize, CV_32FC1);
||||||, frame1).out(flow1, flow2).time(159);
Ptr<DenseOpticalFlow> alg = createOptFlow_DualTVL1();
TEST_CYCLE() alg->calc(frame0, frame1, flow);
Mat flows[2] = { flow1, flow2 };
split(flow, flows);
SANITY_CHECK(flow1, eps);
SANITY_CHECK(flow2, eps);
else if (RUN_OCL_IMPL)
ocl::OpticalFlowDual_TVL1_OCL oclAlg;
ocl::oclMat oclFrame0(frame0), oclFrame1(frame1), oclFlow1(srcSize, CV_32FC1),
oclFlow2(srcSize, CV_32FC1);
OCL_TEST_CYCLE() oclAlg(oclFrame0, oclFrame1, oclFlow1, oclFlow2);
SANITY_CHECK(flow1, eps);
SANITY_CHECK(flow2, eps);
///////////// FarnebackOpticalFlow ////////////////////////
typedef tuple<tuple<int, double>, farneFlagType, bool> FarnebackOpticalFlowParams;
typedef TestBaseWithParam<FarnebackOpticalFlowParams> FarnebackOpticalFlowFixture;
PERF_TEST_P(FarnebackOpticalFlowFixture, FarnebackOpticalFlow,
::testing::Values(make_tuple<int, double>(5, 1.1),
make_tuple<int, double>(7, 1.5)),
Mat frame0 = imread(getDataPath("gpu/opticalflow/rubberwhale1.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty()) << "can't load rubberwhale1.png";
Mat frame1 = imread(getDataPath("gpu/opticalflow/rubberwhale2.png"), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty()) << "can't load rubberwhale2.png";
const Size srcSize = frame0.size();
const FarnebackOpticalFlowParams params = GetParam();
const tuple<int, double> polyParams = get<0>(params);
const int polyN = get<0>(polyParams), flags = get<1>(params);
const double polySigma = get<1>(polyParams), pyrScale = 0.5;
const bool useInitFlow = get<2>(params);
const double eps = 1.5;
Mat flowx(srcSize, CV_32FC1), flowy(srcSize, CV_32FC1), flow(srcSize, CV_32FC2);
||||||, frame1).out(flowx, flowy);
ocl::FarnebackOpticalFlow farn;
farn.pyrScale = pyrScale;
farn.polyN = polyN;
farn.polySigma = polySigma;
farn.flags = flags;
if (useInitFlow)
frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
farn.numIters, farn.polyN, farn.polySigma, farn.flags);
frame0, frame1, flow, farn.pyrScale, farn.numLevels, farn.winSize,
farn.numIters, farn.polyN, farn.polySigma, farn.flags);
Mat flowxy[2] = { flowx, flowy };
split(flow, flowxy);
SANITY_CHECK(flowx, eps);
SANITY_CHECK(flowy, eps);
else if (RUN_OCL_IMPL)
ocl::oclMat oclFrame0(frame0), oclFrame1(frame1),
oclFlowx(srcSize, CV_32FC1), oclFlowy(srcSize, CV_32FC1);
if (useInitFlow)
farn(oclFrame0, oclFrame1, oclFlowx, oclFlowy);
farn(oclFrame0, oclFrame1, oclFlowx, oclFlowy);
SANITY_CHECK(flowx, eps);
SANITY_CHECK(flowy, eps);
@ -1,103 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
// Authors:
// * Peter Andreas Entschev,
#include "perf_precomp.hpp"
using namespace perf;
/////////////////// ORB ///////////////////
typedef std::tr1::tuple<std::string, int> Image_NFeatures_t;
typedef perf::TestBaseWithParam<Image_NFeatures_t> Image_NFeatures;
PERF_TEST_P(Image_NFeatures, ORB,
const Image_NFeatures_t params = GetParam();
const std::string imgFile = std::tr1::get<0>(params);
const int nFeatures = std::tr1::get<1>(params);
const cv::Mat img = imread(getDataPath(imgFile), cv::IMREAD_GRAYSCALE);
cv::ocl::ORB_OCL d_orb(nFeatures);
const cv::ocl::oclMat d_img(img);
cv::ocl::oclMat d_keypoints, d_descriptors;
TEST_CYCLE() d_orb(d_img, cv::ocl::oclMat(), d_keypoints, d_descriptors);
std::vector<cv::KeyPoint> ocl_keypoints;
d_orb.downloadKeyPoints(d_keypoints, ocl_keypoints);
cv::Mat ocl_descriptors(d_descriptors);
ocl_descriptors = ocl_descriptors.rowRange(0, 10);
sortKeyPoints(ocl_keypoints, ocl_descriptors);
SANITY_CHECK_KEYPOINTS(ocl_keypoints, 1e-4);
else if (RUN_PLAIN_IMPL)
cv::ORB orb(nFeatures);
std::vector<cv::KeyPoint> cpu_keypoints;
cv::Mat cpu_descriptors;
TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
@ -1,198 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
# if defined __clang__ || defined __APPLE__
# pragma GCC diagnostic ignored "-Wmissing-prototypes"
# pragma GCC diagnostic ignored "-Wextra"
# endif
#include <iomanip>
#include <stdexcept>
#include <string>
#include <iostream>
#include <cstdio>
#include <vector>
#include <numeric>
#include "cvconfig.h"
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/calib3d.hpp"
#include "opencv2/video.hpp"
#include "opencv2/objdetect.hpp"
#include "opencv2/features2d.hpp"
#include "opencv2/ocl.hpp"
#include "opencv2/ts.hpp"
using namespace std;
using namespace cv;
#define OCL_SIZE_1000 Size(1000, 1000)
#define OCL_SIZE_2000 Size(2000, 2000)
#define OCL_SIZE_4000 Size(4000, 4000)
#define OCL_TYPICAL_MAT_SIZES ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000, OCL_SIZE_4000)
#define OCL_PERF_ENUM(type, ...) ::testing::Values(type, ## __VA_ARGS__ )
#define IMPL_OCL "ocl"
#define IMPL_GPU "gpu"
#define IMPL_PLAIN "plain"
#define RUN_OCL_IMPL (IMPL_OCL == getSelectedImpl())
#define RUN_PLAIN_IMPL (IMPL_PLAIN == getSelectedImpl())
# define RUN_GPU_IMPL (IMPL_GPU == getSelectedImpl())
#define OCL_PERF_ELSE \
else \
#define OCL_PERF_ELSE \
#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); cv::ocl::finish(), stopTimer())
#define OCL_TEST_CYCLE() for(; startTimer(), next(); cv::ocl::finish(), stopTimer())
#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; cv::ocl::finish(), ++r)
// TODO: Move to the ts module
namespace cvtest {
namespace ocl {
inline void checkDeviceMaxMemoryAllocSize(const Size& size, int type, int factor = 1)
assert(factor > 0);
if (!(IMPL_OCL == perf::TestBase::getSelectedImpl()))
return; // OpenCL devices are not used
int cn = CV_MAT_CN(type);
int cn_ocl = cn == 3 ? 4 : cn;
int type_ocl = CV_MAKE_TYPE(CV_MAT_DEPTH(type), cn_ocl);
size_t memSize = size.area() * CV_ELEM_SIZE(type_ocl);
const cv::ocl::DeviceInfo& devInfo = cv::ocl::Context::getContext()->getDeviceInfo();
if (memSize * factor >= devInfo.maxMemAllocSize)
throw perf::TestBase::PerfSkipTestException();
struct KeypointIdxCompare
std::vector<cv::KeyPoint>* keypoints;
explicit KeypointIdxCompare(std::vector<cv::KeyPoint>* _keypoints) : keypoints(_keypoints) {}
bool operator ()(size_t i1, size_t i2) const
cv::KeyPoint kp1 = (*keypoints)[i1];
cv::KeyPoint kp2 = (*keypoints)[i2];
if ( !=
return <;
if ( !=
return <;
if (kp1.response != kp2.response)
return kp1.response < kp2.response;
return kp1.octave < kp2.octave;
inline void sortKeyPoints(std::vector<cv::KeyPoint>& keypoints, cv::InputOutputArray _descriptors = cv::noArray())
std::vector<size_t> indexies(keypoints.size());
for (size_t i = 0; i < indexies.size(); ++i)
indexies[i] = i;
std::sort(indexies.begin(), indexies.end(), KeypointIdxCompare(&keypoints));
std::vector<cv::KeyPoint> new_keypoints;
cv::Mat new_descriptors;
cv::Mat descriptors;
if (_descriptors.needed())
descriptors = _descriptors.getMat();
new_descriptors.create(descriptors.size(), descriptors.type());
for (size_t i = 0; i < indexies.size(); ++i)
size_t new_idx = indexies[i];
new_keypoints[i] = keypoints[new_idx];
if (!new_descriptors.empty())
descriptors.row((int) new_idx).copyTo(new_descriptors.row((int) i));
if (_descriptors.needed())
} // namespace cvtest::ocl
} // namespace cvtest
using namespace cvtest::ocl;
@ -1,130 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
///////////// pyrDown //////////////////////
typedef Size_MatType pyrDownFixture;
PERF_TEST_P(pyrDownFixture, pyrDown,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
Size dstSize((srcSize.height + 1) >> 1, (srcSize.width + 1) >> 1);
checkDeviceMaxMemoryAllocSize(srcSize, type);
checkDeviceMaxMemoryAllocSize(dstSize, type);
Mat src(srcSize, type), dst;
dst.create(dstSize, type);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() pyrDown(src, dst);
///////////// pyrUp ////////////////////////
typedef Size_MatType pyrUpFixture;
PERF_TEST_P(pyrUpFixture, pyrUp,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
Size dstSize(srcSize.height << 1, srcSize.width << 1);
checkDeviceMaxMemoryAllocSize(srcSize, type);
checkDeviceMaxMemoryAllocSize(dstSize, type);
Mat src(srcSize, type), dst;
dst.create(dstSize, type);
||||||, WARMUP_RNG).out(dst);
ocl::oclMat oclSrc(src), oclDst(dstSize, type);
OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() pyrDown(src, dst);
@ -1,146 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Fangfang Bai,
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "perf_precomp.hpp"
using namespace perf;
using std::tr1::tuple;
using std::tr1::get;
///////////// Merge////////////////////////
typedef Size_MatType MergeFixture;
PERF_TEST_P(MergeFixture, Merge,
::testing::Combine(::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000),
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int depth = get<1>(params), channels = 3;
const int dstType = CV_MAKE_TYPE(depth, channels);
checkDeviceMaxMemoryAllocSize(srcSize, dstType);
Mat dst(srcSize, dstType);
vector<Mat> src(channels);
for (vector<Mat>::iterator i = src.begin(), end = src.end(); i != end; ++i)
i->create(srcSize, CV_MAKE_TYPE(depth, 1));
||||||*i, WARMUP_RNG);
ocl::oclMat oclDst(srcSize, dstType);
vector<ocl::oclMat> oclSrc(src.size());
for (vector<ocl::oclMat>::size_type i = 0, end = src.size(); i < end; ++i)
oclSrc[i] = src[i];
OCL_TEST_CYCLE() cv::ocl::merge(oclSrc, oclDst);
else if (RUN_PLAIN_IMPL)
TEST_CYCLE() cv::merge(src, dst);
///////////// Split////////////////////////
typedef Size_MatType SplitFixture;
PERF_TEST_P(SplitFixture, Split,
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int depth = get<1>(params), channels = 3;
const int type = CV_MAKE_TYPE(depth, channels);
checkDeviceMaxMemoryAllocSize(srcSize, type);
Mat src(srcSize, type);
||||||, WARMUP_RNG);
ocl::oclMat oclSrc(src);
vector<ocl::oclMat> oclDst(channels, ocl::oclMat(srcSize, CV_MAKE_TYPE(depth, 1)));
OCL_TEST_CYCLE() cv::ocl::split(oclSrc, oclDst);
ASSERT_EQ(3, channels);
Mat dst0, dst1, dst2;
else if (RUN_PLAIN_IMPL)
vector<Mat> dst(channels, Mat(srcSize, CV_MAKE_TYPE(depth, 1)));
TEST_CYCLE() cv::split(src, dst);
ASSERT_EQ(3, channels);
Mat & dst0 = dst[0], & dst1 = dst[1], & dst2 = dst[2];
File diff suppressed because it is too large
Load Diff
@ -1,639 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2013, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2013, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
namespace ocl
typedef struct _contant_struct
cl_float c_Tb;
cl_float c_TB;
cl_float c_Tg;
cl_float c_varInit;
cl_float c_varMin;
cl_float c_varMax;
cl_float c_tau;
cl_uchar c_shadowVal;
cl_mem cl_constants = NULL;
float c_TB;
#if defined _MSC_VER
#define snprintf sprintf_s
namespace cv { namespace ocl { namespace device
namespace mog
void mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma);
void getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio);
void loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau,
unsigned char shadowVal);
void mog2_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& modesUsed, oclMat& weight, oclMat& variance, oclMat& mean,
float alphaT, float prune, bool detectShadows, int nmixtures);
void getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures);
namespace mog
const int defaultNMixtures = 5;
const int defaultHistory = 200;
const float defaultBackgroundRatio = 0.7f;
const float defaultVarThreshold = 2.5f * 2.5f;
const float defaultNoiseSigma = 30.0f * 0.5f;
const float defaultInitialWeight = 0.05f;
void cv::ocl::BackgroundSubtractor::operator()(const oclMat&, oclMat&, float)
cv::ocl::MOG::MOG(int nmixtures) :
frameSize_(0, 0), frameType_(0), nframes_(0)
nmixtures_ = std::min(nmixtures > 0 ? nmixtures : mog::defaultNMixtures, 8);
history = mog::defaultHistory;
varThreshold = mog::defaultVarThreshold;
backgroundRatio = mog::defaultBackgroundRatio;
noiseSigma = mog::defaultNoiseSigma;
void cv::ocl::MOG::initialize(cv::Size frameSize, int frameType)
CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
frameSize_ = frameSize;
frameType_ = frameType;
int ch = CV_MAT_CN(frameType);
int work_ch = ch;
// for each gaussian mixture of each pixel bg model we store
// the mixture sort key (w/sum_of_variances), the mixture weight (w),
// the mean (nchannels values) and
// the diagonal covariance matrix (another nchannels values)
weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
sortKey_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
var_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch));
nframes_ = 0;
void cv::ocl::MOG::operator()(const cv::ocl::oclMat& frame, cv::ocl::oclMat& fgmask, float learningRate)
using namespace cv::ocl::device::mog;
CV_Assert(frame.depth() == CV_8U);
int ch = frame.oclchannels();
int work_ch = ch;
if (nframes_ == 0 || learningRate >= 1.0 || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
initialize(frame.size(), frame.type());
fgmask.create(frameSize_, CV_8UC1);
learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(nframes_, history);
CV_Assert(learningRate >= 0.0f);
mog_ocl(frame, ch, fgmask, weight_, sortKey_, mean_, var_, nmixtures_,
varThreshold, learningRate, backgroundRatio, noiseSigma);
void cv::ocl::MOG::getBackgroundImage(oclMat& backgroundImage) const
using namespace cv::ocl::device::mog;
backgroundImage.create(frameSize_, frameType_);
cv::ocl::device::mog::getBackgroundImage_ocl(backgroundImage.oclchannels(), weight_, mean_, backgroundImage, nmixtures_, backgroundRatio);
void cv::ocl::MOG::release()
frameSize_ = Size(0, 0);
frameType_ = 0;
nframes_ = 0;
static void mog_withoutLearning(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& mean, oclMat& var,
int nmixtures, float varThreshold, float backgroundRatio)
Context* clCxt = Context::getContext();
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {frame.cols, frame.rows, 1};
int frame_step = (int)(frame.step/frame.elemSize());
int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
int weight_step = (int)(weight.step/weight.elemSize());
int mean_step = (int)(mean.step/mean.elemSize());
int var_step = (int)(var.step/var.elemSize());
int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
int frame_offset_y = (int)(frame.offset/frame.step);
int frame_offset_x = (int)(frame.offset%frame.step);
frame_offset_x = frame_offset_x/(int)frame.elemSize();
char build_option[50];
if(cn == 1)
snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
String kernel_name = "mog_withoutLearning_kernel";
std::vector<std::pair<size_t, const void*> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
static void mog_withLearning(const oclMat& frame, int cn, oclMat& fgmask_raw, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
int nmixtures, float varThreshold, float backgroundRatio, float learningRate, float minVar)
Context* clCxt = Context::getContext();
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {frame.cols, frame.rows, 1};
oclMat fgmask(fgmask_raw.size(), CV_32SC1);
int frame_step = (int)(frame.step/frame.elemSize());
int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
int weight_step = (int)(weight.step/weight.elemSize());
int sortKey_step = (int)(sortKey.step/sortKey.elemSize());
int mean_step = (int)(mean.step/mean.elemSize());
int var_step = (int)(var.step/var.elemSize());
int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
int frame_offset_y = (int)(frame.offset/frame.step);
int frame_offset_x = (int)(frame.offset%frame.step);
frame_offset_x = frame_offset_x/(int)frame.elemSize();
char build_option[50];
if(cn == 1)
snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
String kernel_name = "mog_withLearning_kernel";
std::vector<std::pair<size_t, const void*> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&sortKey_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&varThreshold));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&learningRate));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&minVar));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
fgmask.convertTo(fgmask, CV_8U);
void cv::ocl::device::mog::mog_ocl(const oclMat& frame, int cn, oclMat& fgmask, oclMat& weight, oclMat& sortKey, oclMat& mean, oclMat& var,
int nmixtures, float varThreshold, float learningRate, float backgroundRatio, float noiseSigma)
const float minVar = noiseSigma * noiseSigma;
if(learningRate > 0.0f)
mog_withLearning(frame, cn, fgmask, weight, sortKey, mean, var, nmixtures,
varThreshold, backgroundRatio, learningRate, minVar);
mog_withoutLearning(frame, cn, fgmask, weight, mean, var, nmixtures, varThreshold, backgroundRatio);
void cv::ocl::device::mog::getBackgroundImage_ocl(int cn, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures, float backgroundRatio)
Context* clCxt = Context::getContext();
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {dst.cols, dst.rows, 1};
int weight_step = (int)(weight.step/weight.elemSize());
int mean_step = (int)(mean.step/mean.elemSize());
int dst_step = (int)(dst.step/dst.elemSize());
char build_option[50];
if(cn == 1)
snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
String kernel_name = "getBackgroundImage_kernel";
std::vector<std::pair<size_t, const void*> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&backgroundRatio));
openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float varInit, float varMin, float varMax, float tau, unsigned char shadowVal)
varMin = cv::min(varMin, varMax);
varMax = cv::max(varMin, varMax);
c_TB = TB;
_contant_struct *constants = new _contant_struct;
constants->c_Tb = Tb;
constants->c_TB = TB;
constants->c_Tg = Tg;
constants->c_varInit = varInit;
constants->c_varMin = varMin;
constants->c_varMax = varMax;
constants->c_tau = tau;
constants->c_shadowVal = shadowVal;
cl_constants = load_constant(*((cl_context*)getClContextPtr()), *((cl_command_queue*)getClCommandQueuePtr()),
(void *)constants, sizeof(_contant_struct));
void cv::ocl::device::mog::mog2_ocl(const oclMat& frame, int cn, oclMat& fgmaskRaw, oclMat& modesUsed, oclMat& weight, oclMat& variance,
oclMat& mean, float alphaT, float prune, bool detectShadows, int nmixtures)
oclMat fgmask(fgmaskRaw.size(), CV_32SC1);
Context* clCxt = Context::getContext();
const float alpha1 = 1.0f - alphaT;
cl_int detectShadows_flag = 0;
detectShadows_flag = 1;
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {frame.cols, frame.rows, 1};
int frame_step = (int)(frame.step/frame.elemSize());
int fgmask_step = (int)(fgmask.step/fgmask.elemSize());
int weight_step = (int)(weight.step/weight.elemSize());
int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
int mean_step = (int)(mean.step/mean.elemSize());
int var_step = (int)(variance.step/variance.elemSize());
int fgmask_offset_y = (int)(fgmask.offset/fgmask.step);
int fgmask_offset_x = (int)(fgmask.offset%fgmask.step);
fgmask_offset_x = fgmask_offset_x/(int)fgmask.elemSize();
int frame_offset_y = (int)(frame.offset/frame.step);
int frame_offset_x = (int)(frame.offset%frame.step);
frame_offset_x = frame_offset_x/(int)frame.elemSize();
String kernel_name = "mog2_kernel";
std::vector<std::pair<size_t, const void*> > args;
char build_option[50];
if(cn == 1)
snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&var_step));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&alphaT));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&alpha1));
args.push_back(std::make_pair(sizeof(cl_float), (void*)&prune));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&detectShadows_flag));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_x));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&fgmask_offset_y));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_x));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&frame_offset_y));
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&cl_constants));
openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
fgmask.convertTo(fgmask, CV_8U);
void cv::ocl::device::mog::getBackgroundImage2_ocl(int cn, const oclMat& modesUsed, const oclMat& weight, const oclMat& mean, oclMat& dst, int nmixtures)
Context* clCxt = Context::getContext();
size_t local_thread[] = {32, 8, 1};
size_t global_thread[] = {modesUsed.cols, modesUsed.rows, 1};
int weight_step = (int)(weight.step/weight.elemSize());
int modesUsed_step = (int)(modesUsed.step/modesUsed.elemSize());
int mean_step = (int)(mean.step/mean.elemSize());
int dst_step = (int)(dst.step/dst.elemSize());
int dst_y = (int)(dst.offset/dst.step);
int dst_x = (int)(dst.offset%dst.step);
dst_x = dst_x/(int)dst.elemSize();
String kernel_name = "getBackgroundImage2_kernel";
std::vector<std::pair<size_t, const void*> > args;
char build_option[50];
if(cn == 1)
snprintf(build_option, 50, "-D CN1 -D NMIXTURES=%d", nmixtures);
snprintf(build_option, 50, "-D NMIXTURES=%d", nmixtures);
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_float), (void*)&c_TB));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&modesUsed_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&weight_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&mean_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_x));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&dst_y));
openCLExecuteKernel(clCxt, &bgfg_mog, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
// MOG2
namespace mog2
// default parameters of gaussian background detection algorithm
const int defaultHistory = 500; // Learning rate; alpha = 1/defaultHistory2
const float defaultVarThreshold = 4.0f * 4.0f;
const int defaultNMixtures = 5; // maximal number of Gaussians in mixture
const float defaultBackgroundRatio = 0.9f; // threshold sum of weights for background test
const float defaultVarThresholdGen = 3.0f * 3.0f;
const float defaultVarInit = 15.0f; // initial variance for new components
const float defaultVarMax = 5.0f * defaultVarInit;
const float defaultVarMin = 4.0f;
// additional parameters
const float defaultfCT = 0.05f; // complexity reduction prior constant 0 - no reduction of number of components
const unsigned char defaultnShadowDetection = 127; // value to use in the segmentation mask for shadows, set 0 not to do shadow detection
const float defaultfTau = 0.5f; // Tau - shadow threshold, see the paper for explanation
cv::ocl::MOG2::MOG2(int nmixtures) : frameSize_(0, 0), frameType_(0), nframes_(0)
nmixtures_ = nmixtures > 0 ? nmixtures : mog2::defaultNMixtures;
history = mog2::defaultHistory;
varThreshold = mog2::defaultVarThreshold;
bShadowDetection = true;
backgroundRatio = mog2::defaultBackgroundRatio;
fVarInit = mog2::defaultVarInit;
fVarMax = mog2::defaultVarMax;
fVarMin = mog2::defaultVarMin;
varThresholdGen = mog2::defaultVarThresholdGen;
fCT = mog2::defaultfCT;
nShadowDetection = mog2::defaultnShadowDetection;
fTau = mog2::defaultfTau;
void cv::ocl::MOG2::initialize(cv::Size frameSize, int frameType)
using namespace cv::ocl::device::mog;
CV_Assert(frameType == CV_8UC1 || frameType == CV_8UC3 || frameType == CV_8UC4);
frameSize_ = frameSize;
frameType_ = frameType;
nframes_ = 0;
int ch = CV_MAT_CN(frameType);
int work_ch = ch;
// for each gaussian mixture of each pixel bg model we store ...
// the mixture weight (w),
// the mean (nchannels values) and
// the covariance
weight_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
variance_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC1);
mean_.create(frameSize.height * nmixtures_, frameSize_.width, CV_32FC(work_ch)); //4 channels
//make the array for keeping track of the used modes per pixel - all zeros at start
bgmodelUsedModes_.create(frameSize_, CV_32FC1);
loadConstants(varThreshold, backgroundRatio, varThresholdGen, fVarInit, fVarMin, fVarMax, fTau, nShadowDetection);
void cv::ocl::MOG2::operator()(const oclMat& frame, oclMat& fgmask, float learningRate)
using namespace cv::ocl::device::mog;
int ch = frame.oclchannels();
int work_ch = ch;
if (nframes_ == 0 || learningRate >= 1.0f || frame.size() != frameSize_ || work_ch != mean_.oclchannels())
initialize(frame.size(), frame.type());
fgmask.create(frameSize_, CV_8UC1);
learningRate = learningRate >= 0.0f && nframes_ > 1 ? learningRate : 1.0f / std::min(2 * nframes_, history);
CV_Assert(learningRate >= 0.0f);
mog2_ocl(frame, frame.oclchannels(), fgmask, bgmodelUsedModes_, weight_, variance_, mean_, learningRate, -learningRate * fCT, bShadowDetection, nmixtures_);
void cv::ocl::MOG2::getBackgroundImage(oclMat& backgroundImage) const
using namespace cv::ocl::device::mog;
backgroundImage.create(frameSize_, frameType_);
cv::ocl::device::mog::getBackgroundImage2_ocl(backgroundImage.oclchannels(), bgmodelUsedModes_, weight_, mean_, backgroundImage, nmixtures_);
void cv::ocl::MOG2::release()
frameSize_ = Size(0, 0);
frameType_ = 0;
nframes_ = 0;
@ -1,99 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Nathan,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
void cv::ocl::blendLinear(const oclMat &src1, const oclMat &src2, const oclMat &weights1, const oclMat &weights2,
oclMat &dst)
CV_Assert(src1.depth() <= CV_32F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(weights1.size() == weights2.size() && weights1.size() == src1.size() &&
weights1.type() == CV_32FC1 && weights2.type() == CV_32FC1);
dst.create(src1.size(), src1.type());
size_t globalSize[] = { dst.cols, dst.rows, 1};
size_t localSize[] = { 16, 16, 1 };
int depth = dst.depth(), ocn = dst.oclchannels();
int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
int weight1_step = weights1.step / weights1.elemSize(), weight1_offset = weights1.offset / weights1.elemSize();
int weight2_step = weights2.step / weights2.elemSize(), weight2_offset = weights2.offset / weights2.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
const char * const channelMap[] = { "", "", "2", "4", "4" };
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
std::string buildOptions = format("-D T=%s%s -D convertToT=convert_%s%s%s -D FT=float%s -D convertToFT=convert_float%s",
typeMap[depth], channelMap[ocn], typeMap[depth], channelMap[ocn],
depth >= CV_32S ? "" : "_sat_rte", channelMap[ocn], channelMap[ocn]);
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src1_step ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src2_step ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight1_offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight1_step ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight2_offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&weight2_step ));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.rows ));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.cols ));
openCLExecuteKernel(src1.clCxt, &blend_linear, "blendLinear", globalSize, localSize, args,
-1, -1, buildOptions.c_str());
@ -1,91 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Matthias Bady aegirxx ==>
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
BRIEF_OCL::BRIEF_OCL( int _bytes ) : bytes( _bytes )
void BRIEF_OCL::compute( const oclMat& image, const oclMat& keypoints, oclMat& mask, oclMat& descriptors ) const
CV_Assert( image.type( ) == CV_8UC1 );
if ( keypoints.size( ).area( ) == 0 ) return;
descriptors = oclMat( Mat( keypoints.cols, bytes, CV_8UC1 ) );
if( mask.cols != keypoints.cols )
mask = oclMat( Mat::ones( 1, keypoints.cols, CV_8UC1 ) );
oclMat sum;
integral( image, sum, CV_32S );
cl_mem sumTexture = bindTexture( sum );
std::stringstream build_opt;
<< " -D BYTES=" << bytes
<< " -D BORDER=" << getBorderSize();
const String kernelname = "extractBriefDescriptors";
size_t localThreads[3] = {bytes, 1, 1};
size_t globalThreads[3] = {keypoints.cols * bytes, 1, 1};
Context* ctx = Context::getContext( );
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof (cl_mem), (void *) &sumTexture ) );
args.push_back( std::make_pair( sizeof (cl_mem), (void *) & ) );
args.push_back( std::make_pair( sizeof (cl_int), (void *) &keypoints.step ) );
args.push_back( std::make_pair( sizeof (cl_mem), (void *) & ) );
args.push_back( std::make_pair( sizeof (cl_int), (void *) &descriptors.step ) );
args.push_back( std::make_pair( sizeof (cl_mem), (void *) & ) );
openCLExecuteKernel( ctx, &brief, kernelname, globalThreads, localThreads, args, -1, -1, build_opt.str( ).c_str( ) );
openCLFree( sumTexture );
int BRIEF_OCL::getBorderSize( )
return PATCH_SIZE / 2 + KERNEL_SIZE / 2;
File diff suppressed because it is too large
Load Diff
@ -1,285 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
// buildWarpPlaneMaps
void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
float scale, oclMat &xmap, oclMat &ymap)
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
CV_Assert((T.size() == Size(3, 1) || T.size() == Size(1, 3)) && T.type() == CV_32F && T.isContinuous());
Mat K_Rinv = K * R.t();
Mat KRT_mat(1, 12, CV_32FC1); // 9 + 3
KRT_mat(Range::all(), Range(0, 8)) = K_Rinv.reshape(1, 1);
KRT_mat(Range::all(), Range(9, 11)) = T;
oclMat KRT_oclMat(KRT_mat);
// transfer K_Rinv and T into a single cl_mem
xmap.create(dst_roi.size(), CV_32F);
ymap.create(dst_roi.size(), CV_32F);
int tl_u =;
int tl_v =;
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
#ifdef ANDROID
size_t localThreads[3] = {32, 4, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1);
// buildWarpCylyndricalMaps
void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
oclMat &xmap, oclMat &ymap)
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
Mat K_Rinv = K * R.t();
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
xmap.create(dst_roi.size(), CV_32F);
ymap.create(dst_roi.size(), CV_32F);
int tl_u =;
int tl_v =;
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
#ifdef ANDROID
size_t localThreads[3] = {32, 1, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1);
// buildWarpSphericalMaps
void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
oclMat &xmap, oclMat &ymap)
CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
Mat K_Rinv = K * R.t();
oclMat KR_oclMat(K_Rinv.reshape(1, 1));
// transfer K_Rinv, R_Kinv into a single cl_mem
xmap.create(dst_roi.size(), CV_32F);
ymap.create(dst_roi.size(), CV_32F);
int tl_u =;
int tl_v =;
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_u));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&tl_v));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale));
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
#ifdef ANDROID
size_t localThreads[3] = {32, 4, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpSphericalMaps", globalThreads, localThreads, args, -1, -1);
// buildWarpAffineMaps
void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
CV_Assert(M.rows == 2 && M.cols == 3);
xmap.create(dsize, CV_32FC1);
ymap.create(dsize, CV_32FC1);
float coeffs[2 * 3];
Mat coeffsMat(2, 3, CV_32F, (void *)coeffs);
if (inverse)
M.convertTo(coeffsMat, coeffsMat.type());
cv::Mat iM;
invertAffineTransform(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
#ifdef ANDROID
size_t localThreads[3] = {32, 4, 1};
size_t localThreads[3] = {32, 8, 1};
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpAffineMaps", globalThreads, localThreads, args, -1, -1);
// buildWarpPerspectiveMaps
void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
CV_Assert(M.rows == 3 && M.cols == 3);
CV_Assert(dsize.area() > 0);
xmap.create(dsize, CV_32FC1);
ymap.create(dsize, CV_32FC1);
float coeffs[3 * 3];
Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
if (inverse)
M.convertTo(coeffsMat, coeffsMat.type());
cv::Mat iM;
invert(M, iM);
iM.convertTo(coeffsMat, coeffsMat.type());
oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&xmap_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&ymap_offset));
size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1);
@ -1,387 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
cv::ocl::CannyBuf::CannyBuf(const oclMat &dx_, const oclMat &dy_) : dx(dx_), dy(dy_), counter(1, 1, CV_32SC1)
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
create(dx_.size(), -1);
void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
ensureSizeIsEnough(image_size, CV_32SC1, dx);
ensureSizeIsEnough(image_size, CV_32SC1, dy);
if(apperture_size == 3)
ensureSizeIsEnough(image_size, CV_32SC1, dx_buf);
ensureSizeIsEnough(image_size, CV_32SC1, dy_buf);
else if(apperture_size > 0)
Mat kx, ky;
if (!filterDX)
filterDX = createDerivFilter_GPU(CV_8U, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE);
if (!filterDY)
filterDY = createDerivFilter_GPU(CV_8U, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE);
ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, magBuf);
ensureSizeIsEnough(image_size.height + 2, image_size.width + 2, CV_32FC1, mapBuf);
ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf1);
ensureSizeIsEnough(1, image_size.area(), CV_16UC2, trackBuf2);
void cv::ocl::CannyBuf::release()
namespace cv
namespace ocl
namespace canny
void calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols);
void calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
void calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad);
void calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh);
void edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, oclMat& counter, int rows, int cols);
void edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, oclMat& counter, int rows, int cols);
void getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols);
}// cv::ocl
void CannyCaller(CannyBuf &buf, oclMat &dst, float low_thresh, float high_thresh)
using namespace ::cv::ocl::canny;
calcMap_gpu(buf.dx, buf.dy, buf.magBuf, buf.mapBuf, dst.rows, dst.cols, low_thresh, high_thresh);
edgesHysteresisLocal_gpu(buf.mapBuf, buf.trackBuf1, buf.counter, dst.rows, dst.cols);
edgesHysteresisGlobal_gpu(buf.mapBuf, buf.trackBuf1, buf.trackBuf2, buf.counter, dst.rows, dst.cols);
getEdges_gpu(buf.mapBuf, dst, dst.rows, dst.cols);
void cv::ocl::Canny(const oclMat &src, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
CannyBuf buf(src.size(), apperture_size);
Canny(src, buf, dst, low_thresh, high_thresh, apperture_size, L2gradient);
void cv::ocl::Canny(const oclMat &src, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
using namespace ::cv::ocl::canny;
CV_Assert(src.type() == CV_8UC1);
if( low_thresh > high_thresh )
std::swap( low_thresh, high_thresh );
dst.create(src.size(), CV_8U);
buf.create(src.size(), apperture_size);
if (apperture_size == 3)
calcSobelRowPass_gpu(src, buf.dx_buf, buf.dy_buf, src.rows, src.cols);
calcMagnitude_gpu(buf.dx_buf, buf.dy_buf, buf.dx, buf.dy, buf.magBuf, src.rows, src.cols, L2gradient);
buf.filterDX->apply(src, buf.dx);
buf.filterDY->apply(src, buf.dy);
calcMagnitude_gpu(buf.dx, buf.dy, buf.magBuf, src.rows, src.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
CannyBuf buf(dx, dy);
Canny(dx, dy, buf, dst, low_thresh, high_thresh, L2gradient);
void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &dst, double low_thresh, double high_thresh, bool L2gradient)
using namespace ::cv::ocl::canny;
CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
if( low_thresh > high_thresh )
std::swap( low_thresh, high_thresh);
dst.create(dx.size(), CV_8U);
buf.dx = dx;
buf.dy = dy;
buf.create(dx.size(), -1);
calcMagnitude_gpu(buf.dx, buf.dy, buf.magBuf, dx.rows, dx.cols, L2gradient);
CannyCaller(buf, dst, static_cast<float>(low_thresh), static_cast<float>(high_thresh));
void canny::calcSobelRowPass_gpu(const oclMat &src, oclMat &dx_buf, oclMat &dy_buf, int rows, int cols)
Context *clCxt = src.clCxt;
String kernelName = "calcSobelRowPass";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
void canny::calcMagnitude_gpu(const oclMat &dx_buf, const oclMat &dy_buf, oclMat &dx, oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
Context *clCxt = dx_buf.clCxt;
String kernelName = "calcMagnitude_buf";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx_buf.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy_buf.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.offset));
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
const char * build_options = L2Grad ? "-D L2GRAD":"";
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
void canny::calcMagnitude_gpu(const oclMat &dx, const oclMat &dy, oclMat &mag, int rows, int cols, bool L2Grad)
Context *clCxt = dx.clCxt;
String kernelName = "calcMagnitude";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.offset));
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
const char * build_options = L2Grad ? "-D L2GRAD":"";
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1, build_options);
void canny::calcMap_gpu(oclMat &dx, oclMat &dy, oclMat &mag, oclMat &map, int rows, int cols, float low_thresh, float high_thresh)
Context *clCxt = dx.clCxt;
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&low_thresh));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&high_thresh));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dx.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dy.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&mag.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
size_t globalThreads[3] = {cols, rows, 1};
String kernelName = "calcMap";
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, oclMat& counter, int rows, int cols)
Context *clCxt = map.clCxt;
std::vector< std::pair<size_t, const void *> > args;
Mat counterMat(counter.rows, counter.cols, counter.type());
||||||<int>(0, 0) = 0;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
cl_int stepBytes = map.step;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&stepBytes));
cl_int offsetBytes = map.offset;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&offsetBytes));
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisLocal", globalThreads, localThreads, args, -1, -1);
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, oclMat& counter, int rows, int cols)
Context *clCxt = map.clCxt;
std::vector< std::pair<size_t, const void *> > args;
size_t localThreads[3] = {128, 1, 1};
while(1 > 0)
Mat counterMat;;
int count =<int>(0, 0);
CV_Assert(count >= 0);
if (count == 0)
||||||<int>(0, 0) = 0;
size_t globalThreads[3] = {std::min((unsigned)count, 65535u) * 128, divUp(count, 65535), 1};
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&count));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
openCLExecuteKernel(clCxt, &imgproc_canny, "edgesHysteresisGlobal", globalThreads, localThreads, args, -1, -1);
std::swap(st1, st2);
void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols)
Context *clCxt = map.clCxt;
String kernelName = "getEdges";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&map.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.offset));
size_t globalThreads[3] = {cols, rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
@ -1,944 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Guoping Long,
// Niko Li,
// Yao Wang,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include <stdlib.h>
#include <ctype.h>
#include <iomanip>
#include <fstream>
#include "cl_programcache.hpp"
#include "opencv2/ocl/private/opencl_utils.hpp"
namespace cv {
namespace ocl {
using namespace cl_utils;
#if defined(WIN32)
static bool __termination = false;
struct __Module
cv::Mutex initializationMutex;
cv::Mutex currentContextMutex;
static __Module __module;
cv::Mutex& getInitializationMutex()
return __module.initializationMutex;
static cv::Mutex& getCurrentContextMutex()
return __module.currentContextMutex;
static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor)
size_t p0 = versionStr.find(' ');
while (true)
if (p0 == std::string::npos)
if (p0 + 1 >= versionStr.length())
char c = versionStr[p0 + 1];
if (isdigit(c))
p0 = versionStr.find(' ', p0 + 1);
size_t p1 = versionStr.find('.', p0);
size_t p2 = versionStr.find(' ', p1);
if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos)
major = 0;
minor = 0;
return false;
std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1);
std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1);
major = atoi(majorStr.c_str());
minor = atoi(minorStr.c_str());
return true;
struct PlatformInfoImpl : public PlatformInfo
cl_platform_id platform_id;
std::vector<int> deviceIDs;
: platform_id(NULL)
void init(int id, cl_platform_id platform)
CV_Assert(platform_id == NULL);
this->_id = id;
platform_id = platform;
openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_PROFILE, this->platformProfile));
openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VERSION, this->platformVersion));
openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_NAME, this->platformName));
openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_VENDOR, this->platformVendor));
openCLSafeCall(getStringInfo(clGetPlatformInfo, platform, CL_PLATFORM_EXTENSIONS, this->platformExtensons));
this->platformVersionMajor, this->platformVersionMinor);
struct DeviceInfoImpl: public DeviceInfo
cl_platform_id platform_id;
cl_device_id device_id;
: platform_id(NULL), device_id(NULL)
void init(int id, PlatformInfoImpl& platformInfoImpl, cl_device_id device)
CV_Assert(device_id == NULL);
this->_id = id;
platform_id = platformInfoImpl.platform_id;
device_id = device;
this->platform = &platformInfoImpl;
cl_device_type type = cl_device_type(-1);
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_TYPE, type));
this->deviceType = DeviceType(type);
openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_PROFILE, this->deviceProfile));
openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VERSION, this->deviceVersion));
openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_NAME, this->deviceName));
openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR, this->deviceVendor));
cl_uint vendorID = 0;
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_VENDOR_ID, vendorID));
this->deviceVendorId = vendorID;
openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DRIVER_VERSION, this->deviceDriverVersion));
openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, this->deviceExtensions));
this->deviceVersionMajor, this->deviceVersionMinor);
size_t maxWorkGroupSize = 0;
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_GROUP_SIZE, maxWorkGroupSize));
this->maxWorkGroupSize = maxWorkGroupSize;
cl_uint maxDimensions = 0;
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, maxDimensions));
std::vector<size_t> maxWorkItemSizes(maxDimensions);
openCLSafeCall(clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions,
(void *)&maxWorkItemSizes[0], 0));
this->maxWorkItemSizes = maxWorkItemSizes;
cl_uint maxComputeUnits = 0;
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_COMPUTE_UNITS, maxComputeUnits));
this->maxComputeUnits = maxComputeUnits;
cl_ulong localMemorySize = 0;
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_LOCAL_MEM_SIZE, localMemorySize));
this->localMemorySize = (size_t)localMemorySize;
cl_ulong maxMemAllocSize = 0;
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, maxMemAllocSize));
this->maxMemAllocSize = (size_t)maxMemAllocSize;
cl_bool unifiedMemory = false;
openCLSafeCall(getScalarInfo(clGetDeviceInfo, device, CL_DEVICE_HOST_UNIFIED_MEMORY, unifiedMemory));
this->isUnifiedMemory = unifiedMemory != 0;
//initialize extra options for compilation. Currently only fp64 is included.
//Assume 4KB is enough to store all possible extensions.
openCLSafeCall(getStringInfo(clGetDeviceInfo, device, CL_DEVICE_EXTENSIONS, this->deviceExtensions));
size_t fp64_khr = this->deviceExtensions.find("cl_khr_fp64");
if(fp64_khr != std::string::npos)
this->compilationExtraOptions += "-D DOUBLE_SUPPORT";
this->haveDoubleSupport = true;
this->haveDoubleSupport = false;
size_t intel_platform = platformInfoImpl.platformVendor.find("Intel");
if(intel_platform != std::string::npos)
this->compilationExtraOptions += " -D INTEL_DEVICE";
this->isIntelDevice = true;
this->isIntelDevice = false;
if (id < 0)
#ifdef CL_VERSION_1_2
if (this->deviceVersionMajor > 1 || (this->deviceVersionMajor == 1 && this->deviceVersionMinor >= 2))
static std::vector<PlatformInfoImpl> global_platforms;
static std::vector<DeviceInfoImpl> global_devices;
static void split(const std::string &s, char delim, std::vector<std::string> &elems) {
std::stringstream ss(s);
std::string item;
while (std::getline(ss, item, delim)) {
static std::vector<std::string> split(const std::string &s, char delim) {
std::vector<std::string> elems;
split(s, delim, elems);
return elems;
// Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
// Sample: AMD:GPU:
// Sample: AMD:GPU:Tahiti
// Sample: :GPU|CPU: = '' = ':' = '::'
static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
std::string deviceTypesStr;
size_t p0 = configurationStr.find(':');
if (p0 != std::string::npos)
size_t p1 = configurationStr.find(':', p0 + 1);
if (p1 != std::string::npos)
size_t p2 = configurationStr.find(':', p1 + 1);
if (p2 != std::string::npos)
std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
return false;
// assume platform + device types + device name/id
platform = configurationStr.substr(0, p0);
deviceTypesStr = configurationStr.substr(p0 + 1, p1 - (p0 + 1));
deviceNameOrID = configurationStr.substr(p1 + 1, configurationStr.length() - (p1 + 1));
// assume platform + device types
platform = configurationStr.substr(0, p0);
deviceTypesStr = configurationStr.substr(p0 + 1, configurationStr.length() - (p0 + 1));
// assume only platform
platform = configurationStr;
deviceTypes = split(deviceTypesStr, '|');
return true;
static bool selectOpenCLDevice()
std::string platform;
std::vector<std::string> deviceTypes;
std::string deviceName;
const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
if (configuration)
if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
return false;
bool isID = false;
int deviceID = -1;
if (deviceName.length() == 1)
// We limit ID range to 0..9, because we want to write:
// - '2500' to mean i5-2500
// - '8350' to mean AMD FX-8350
// - '650' to mean GeForce 650
// To extend ID range change condition to '> 0'
isID = true;
for (size_t i = 0; i < deviceName.length(); i++)
if (!isdigit(deviceName[i]))
isID = false;
if (isID)
deviceID = atoi(deviceName.c_str());
CV_Assert(deviceID >= 0);
const PlatformInfo* platformInfo = NULL;
if (platform.length() > 0)
PlatformsInfo platforms;
for (size_t i = 0; i < platforms.size(); i++)
if (platforms[i]->platformName.find(platform) != std::string::npos)
platformInfo = platforms[i];
if (platformInfo == NULL)
std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
goto not_found;
if (deviceTypes.size() == 0)
if (!isID)
for (size_t t = 0; t < deviceTypes.size(); t++)
int deviceType = 0;
if (deviceTypes[t] == "GPU")
else if (deviceTypes[t] == "CPU")
else if (deviceTypes[t] == "ACCELERATOR")
else if (deviceTypes[t] == "ALL")
std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
goto not_found;
DevicesInfo devices;
getOpenCLDevices(devices, deviceType, platformInfo);
for (size_t i = (isID ? deviceID : 0);
(isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
if (isID || devices[i]->deviceName.find(deviceName) != std::string::npos)
// check for OpenCL 1.1
if (devices[i]->deviceVersionMajor < 1 ||
(devices[i]->deviceVersionMajor == 1 && devices[i]->deviceVersionMinor < 1))
std::cerr << "Skip unsupported version of OpenCL device: " << devices[i]->deviceName
<< "(" << devices[i]->platform->platformName << ")" << std::endl;
continue; // unsupported version of device, skip it
catch (...)
std::cerr << "ERROR: Can't select OpenCL device: " << devices[i]->deviceName
<< "(" << devices[i]->platform->platformName << ")" << std::endl;
goto not_found;
return true;
std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
<< " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
<< " Device types: ";
for (size_t t = 0; t < deviceTypes.size(); t++)
std::cerr << deviceTypes[t] << " ";
std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
return false;
static bool __initialized = false;
static int initializeOpenCLDevices()
__initialized = true;
assert(global_devices.size() == 0);
std::vector<cl_platform_id> platforms;
catch (cv::Exception&)
return 0; // OpenCL not found
for (size_t i = 0; i < platforms.size(); ++i)
PlatformInfoImpl& platformInfo = global_platforms[i];
cl_platform_id platform = platforms[i];
platformInfo.init(i, platform);
std::vector<cl_device_id> devices;
cl_int status = getDevices(platform, CL_DEVICE_TYPE_ALL, devices);
if(status != CL_DEVICE_NOT_FOUND)
if(devices.size() > 0)
int baseIndx = global_devices.size();
global_devices.resize(baseIndx + devices.size());
for(size_t j = 0; j < devices.size(); ++j)
cl_device_id device = devices[j];
DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j];
platformInfo.deviceIDs[j] = baseIndx + j;
deviceInfo.init(baseIndx + j, platformInfo, device);
for (size_t i = 0; i < platforms.size(); ++i)
PlatformInfoImpl& platformInfo = global_platforms[i];
for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j)
DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]];
platformInfo.devices[j] = &deviceInfo;
return global_devices.size();
: _id(-1), deviceType(DeviceType(0)),
maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0), maxMemAllocSize(0),
deviceVersionMajor(0), deviceVersionMinor(0),
haveDoubleSupport(false), isUnifiedMemory(false),isIntelDevice(false),
// nothing
DeviceInfo::~DeviceInfo() { }
: _id(-1),
platformVersionMajor(0), platformVersionMinor(0)
// nothing
PlatformInfo::~PlatformInfo() { }
class ContextImpl;
struct CommandQueue
ContextImpl* context_;
cl_command_queue clQueue_;
CommandQueue() : context_(NULL), clQueue_(NULL) { }
~CommandQueue() { release(); }
void create(ContextImpl* context_);
void release()
#ifdef WIN32
// if process is on termination stage (ExitProcess was called and other threads were terminated)
// then disable command queue release because it may cause program hang
if (!__termination)
openCLSafeCall(clReleaseCommandQueue(clQueue_)); // some cleanup problems are here
clQueue_ = NULL;
context_ = NULL;
cv::TLSData<CommandQueue> commandQueueTLSData;
//////////////////////////////// OpenCL context ////////////////////////
//This is a global singleton class used to represent a OpenCL context.
class ContextImpl : public Context
cl_device_id clDeviceID;
cl_context clContext;
const DeviceInfoImpl& deviceInfoImpl;
ContextImpl(const DeviceInfoImpl& _deviceInfoImpl, cl_context context)
: clDeviceID(_deviceInfoImpl.device_id), clContext(context), deviceInfoImpl(_deviceInfoImpl)
#ifdef CL_VERSION_1_2
if (supportsFeature(FEATURE_CL_VER_1_2))
ContextImpl* old = NULL;
cv::AutoLock lock(getCurrentContextMutex());
old = currentContext;
currentContext = this;
if (old != NULL)
delete old;
CV_Assert(this != currentContext);
#ifdef CL_VERSION_1_2
if (supportsFeature(FEATURE_CL_VER_1_2))
if (deviceInfoImpl._id < 0) // not in the global registry, so we should cleanup it
#ifdef CL_VERSION_1_2
if (supportsFeature(FEATURE_CL_VER_1_2))
PlatformInfoImpl* platformImpl = (PlatformInfoImpl*)(deviceInfoImpl.platform);
delete platformImpl;
delete const_cast<DeviceInfoImpl*>(&deviceInfoImpl);
clDeviceID = NULL;
#ifdef WIN32
// if process is on termination stage (ExitProcess was called and other threads were terminated)
// then disable command queue release because it may cause program hang
if (!__termination)
clContext = NULL;
static void setContext(const DeviceInfo* deviceInfo);
static void initializeContext(void* pClPlatform, void* pClContext, void* pClDevice);
bool supportsFeature(FEATURE_TYPE featureType) const;
static void cleanupContext(void);
static ContextImpl* getContext();
ContextImpl(const ContextImpl&); // disabled
ContextImpl& operator=(const ContextImpl&); // disabled
static ContextImpl* currentContext;
ContextImpl* ContextImpl::currentContext = NULL;
static bool __deviceSelected = false;
Context* Context::getContext()
return ContextImpl::getContext();
ContextImpl* ContextImpl::getContext()
if (currentContext == NULL)
static bool defaultInitiaization = false;
if (!defaultInitiaization)
cv::AutoLock lock(getInitializationMutex());
if (!__initialized)
if (initializeOpenCLDevices() == 0)
CV_Error(Error::OpenCLInitError, "OpenCL not available");
if (!__deviceSelected)
if (!selectOpenCLDevice())
CV_Error(Error::OpenCLInitError, "Can't select OpenCL device");
defaultInitiaization = true;
catch (...)
defaultInitiaization = true;
CV_Assert(currentContext != NULL);
return currentContext;
bool Context::supportsFeature(FEATURE_TYPE featureType) const
return ((ContextImpl*)this)->supportsFeature(featureType);
const DeviceInfo& Context::getDeviceInfo() const
return ((ContextImpl*)this)->deviceInfoImpl;
const void* Context::getOpenCLContextPtr() const
return &(((ContextImpl*)this)->clContext);
const void* Context::getOpenCLCommandQueuePtr() const
ContextImpl* pThis = (ContextImpl*)this;
CommandQueue* commandQueue = commandQueueTLSData.get();
if (commandQueue->context_ != pThis)
return &commandQueue->clQueue_;
const void* Context::getOpenCLDeviceIDPtr() const
return &(((ContextImpl*)this)->clDeviceID);
bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const
switch (featureType)
return deviceInfoImpl.isIntelDevice;
return deviceInfoImpl.haveDoubleSupport;
return deviceInfoImpl.isUnifiedMemory;
case FEATURE_CL_VER_1_2:
return deviceInfoImpl.deviceVersionMajor > 1 || (deviceInfoImpl.deviceVersionMajor == 1 && deviceInfoImpl.deviceVersionMinor >= 2);
CV_Error(CV_StsBadArg, "Invalid feature type");
return false;
void fft_teardown();
void clBlasTeardown();
void ContextImpl::cleanupContext(void)
cv::AutoLock lock(getCurrentContextMutex());
if (currentContext)
ContextImpl* ctx = currentContext;
currentContext = NULL;
delete ctx;
void ContextImpl::setContext(const DeviceInfo* deviceInfo)
CV_Assert(deviceInfo->_id >= 0); // we can't specify custom devices
CV_Assert(deviceInfo->_id < (int)global_devices.size());
cv::AutoLock lock(getCurrentContextMutex());
if (currentContext)
if (currentContext->deviceInfoImpl._id == deviceInfo->_id)
DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
CV_Assert(deviceInfo == &infoImpl);
cl_int status = 0;
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 };
cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status);
ContextImpl* ctx = new ContextImpl(infoImpl, clContext);
void ContextImpl::initializeContext(void* pClPlatform, void* pClContext, void* pClDevice)
CV_Assert(pClPlatform != NULL);
CV_Assert(pClContext != NULL);
CV_Assert(pClDevice != NULL);
cl_platform_id platform = *(cl_platform_id*)pClPlatform;
cl_context context = *(cl_context*)pClContext;
cl_device_id device = *(cl_device_id*)pClDevice;
PlatformInfoImpl* platformInfoImpl = new PlatformInfoImpl();
platformInfoImpl->init(-1, platform);
DeviceInfoImpl* deviceInfoImpl = new DeviceInfoImpl();
deviceInfoImpl->init(-1, *platformInfoImpl, device);
ContextImpl* ctx = new ContextImpl(*deviceInfoImpl, context);
void CommandQueue::create(ContextImpl* context)
cl_int status = 0;
cl_command_queue clCmdQueue = clCreateCommandQueue(context->clContext, context->clDeviceID, 0, &status);
context_ = context;
clQueue_ = clCmdQueue;
int getOpenCLPlatforms(PlatformsInfo& platforms)
if (!__initialized)
for (size_t id = 0; id < global_platforms.size(); ++id)
PlatformInfoImpl& impl = global_platforms[id];
return platforms.size();
int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform)
if (!__initialized)
return 0;
if (platform == NULL)
for (size_t id = 0; id < global_devices.size(); ++id)
DeviceInfoImpl& deviceInfo = global_devices[id];
if (((int)deviceInfo.deviceType & deviceType) != 0)
for (size_t id = 0; id < platform->devices.size(); ++id)
const DeviceInfo* deviceInfo = platform->devices[id];
if (((int)deviceInfo->deviceType & deviceType) == deviceType)
return (int)devices.size();
void setDevice(const DeviceInfo* info)
__deviceSelected = true;
catch (...)
__deviceSelected = true;
void initializeContext(void* pClPlatform, void* pClContext, void* pClDevice)
ContextImpl::initializeContext(pClPlatform, pClContext, pClDevice);
__deviceSelected = true;
catch (...)
__deviceSelected = true;
bool supportsFeature(FEATURE_TYPE featureType)
return Context::getContext()->supportsFeature(featureType);
/* moved to Context::getContext(): initializeOpenCLDevices(); */
#if defined(WIN32) && defined(CVAPI_EXPORTS)
// nothing, see DllMain
} // namespace ocl
} // namespace cv
#if defined(WIN32) && defined(CVAPI_EXPORTS)
extern "C"
BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved);
extern "C"
BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved)
if (fdwReason == DLL_PROCESS_DETACH)
if (lpReserved != NULL) // called after ExitProcess() call
cv::ocl::__termination = true;
return TRUE;
@ -1,549 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Guoping Long,
// Niko Li,
// Yao Wang,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include <iomanip>
#include <fstream>
#include "cl_programcache.hpp"
#define RUN_TIMES 100
namespace cv {
namespace ocl {
DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
int gDevMemTypeValueMap[5] = {0,
void finish()
bool isCpuDevice()
const DeviceInfo& info = Context::getContext()->getDeviceInfo();
return (info.deviceType == CVCL_DEVICE_TYPE_CPU);
size_t queryWaveFrontSize(cl_kernel kernel)
const DeviceInfo& info = Context::getContext()->getDeviceInfo();
if (info.deviceType == CVCL_DEVICE_TYPE_CPU)
return 1;
size_t wavefront = 0;
CV_Assert(kernel != NULL);
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()),
return wavefront;
void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size)
cl_int status;
status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0,
size, host_buffer, 0, NULL, NULL);
cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size)
cl_int status;
cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status);
return buffer;
static const int __memory_corruption_guard_bytes = 64*1024;
static const int __memory_corruption_check_pattern = 0x14326547; // change pattern for sizeof(int)==8
struct CheckBuffers
cl_mem mainBuffer;
size_t size;
size_t widthInBytes, height;
: mainBuffer(NULL), size(0), widthInBytes(0), height(0)
// nothing
CheckBuffers(cl_mem _mainBuffer, size_t _size, size_t _widthInBytes, size_t _height)
: mainBuffer(_mainBuffer), size(_size), widthInBytes(_widthInBytes), height(_height)
// nothing
static std::map<cl_mem, CheckBuffers> __check_buffers;
void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height)
openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType);
void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
cl_int status;
size_t size = widthInBytes * height;
bool useSubBuffers =
const DeviceInfo& devInfo = ctx->getDeviceInfo();
if (useSubBuffers && devInfo.isIntelDevice)
useSubBuffers = false; // TODO FIXIT We observe memory leaks then we working with sub-buffers
// on the CPU device of Intel OpenCL SDK (Linux). We will investigate this later.
if (!useSubBuffers)
*dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
size, 0, &status);
size_t allocSize = size + __memory_corruption_guard_bytes * 2;
cl_mem mainBuffer = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
allocSize, 0, &status);
cl_buffer_region r = {__memory_corruption_guard_bytes, size};
*dev_ptr = clCreateSubBuffer(mainBuffer,
std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
CV_Assert(tmp.size() * sizeof(int) == __memory_corruption_guard_bytes);
mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &tmp[0],
0, NULL, NULL));
mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + size, __memory_corruption_guard_bytes, &tmp[0],
0, NULL, NULL));
CheckBuffers data(mainBuffer, size, widthInBytes, height);
cv::AutoLock lock(getInitializationMutex());
__check_buffers.insert(std::pair<cl_mem, CheckBuffers>((cl_mem)*dev_ptr, data));
*pitch = widthInBytes;
void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch,
const void *src, size_t spitch,
size_t width, size_t height, openCLMemcpyKind kind, int channels)
size_t buffer_origin[3] = {0, 0, 0};
size_t host_origin[3] = {0, 0, 0};
size_t region[3] = {width, height, 1};
if(kind == clMemcpyHostToDevice)
if(dpitch == width || channels == 3 || height == 1)
openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
0, width * height, src, 0, NULL, NULL));
openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
else if(kind == clMemcpyDeviceToHost)
if(spitch == width || channels == 3 || height == 1)
openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
0, width * height, dst, 0, NULL, NULL));
openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset,
const void *src, size_t spitch,
size_t width, size_t height, int src_offset)
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
size_t region[3] = {width, height, 1};
openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
region, spitch, 0, dpitch, 0, 0, 0, 0));
void openCLFree(void *devPtr)
bool failBefore = false, failAfter = false;
CheckBuffers data;
cv::AutoLock lock(getInitializationMutex());
std::map<cl_mem, CheckBuffers>::iterator i = __check_buffers.find((cl_mem)devPtr);
if (i != __check_buffers.end())
data = i->second;
if (data.mainBuffer != NULL)
Context* ctx = Context::getContext();
std::vector<uchar> checkBefore(__memory_corruption_guard_bytes);
std::vector<uchar> checkAfter(__memory_corruption_guard_bytes);
data.mainBuffer, CL_FALSE, 0, __memory_corruption_guard_bytes, &checkBefore[0],
0, NULL, NULL));
data.mainBuffer, CL_FALSE, __memory_corruption_guard_bytes + data.size, __memory_corruption_guard_bytes, &checkAfter[0],
0, NULL, NULL));
std::vector<int> tmp(__memory_corruption_guard_bytes / sizeof(int),
if (memcmp(&checkBefore[0], &tmp[0], __memory_corruption_guard_bytes) != 0)
failBefore = true;
if (memcmp(&checkAfter[0], &tmp[0], __memory_corruption_guard_bytes) != 0)
failAfter = true;
// TODO FIXIT Attach clReleaseMemObject call to event completion callback
// TODO 2013/12/04 Disable workaround
// Context* ctx = Context::getContext();
// clFinish(getClCommandQueue(ctx));
if (failBefore)
std::cerr << "ERROR: Memory corruption detected: before buffer: " << cv::format("widthInBytes=%d height=%d", (int)data.widthInBytes, (int)data.height) << std::endl;
CV_Error(CV_StsInternal, "Memory corruption detected: before buffer");
if (failAfter)
std::cerr << "ERROR: Memory corruption detected: after buffer: " << cv::format("widthInBytes=%d height=%d", (int)data.widthInBytes, (int)data.height) << std::endl;
CV_Error(CV_StsInternal, "Memory corruption detected: after buffer");
cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName)
return openCLGetKernelFromSource(ctx, source, kernelName, NULL);
cl_kernel openCLGetKernelFromSource(const Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName,
const char *build_options)
cl_kernel kernel;
cl_int status = 0;
CV_Assert(ProgramCache::getProgramCache() != NULL);
cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, build_options);
CV_Assert(program != NULL);
kernel = clCreateKernel(program, kernelName.c_str(), &status);
return kernel;
void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads)
size_t kernelWorkGroupSize;
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx),
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] );
CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] );
CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] );
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize );
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize );
static double total_execute_time = 0;
static double total_kernel_time = 0;
static std::string removeDuplicatedWhiteSpaces(const char * buildOptions)
if (buildOptions == NULL)
return "";
size_t length = strlen(buildOptions), didx = 0, sidx = 0;
while (sidx < length && buildOptions[sidx] == 0)
std::string opt;
for ( ; sidx < length; ++sidx)
if (buildOptions[sidx] != ' ')
opt[didx++] = buildOptions[sidx];
else if ( !(didx > 0 && opt[didx - 1] == ' ') )
opt[didx++] = buildOptions[sidx];
return opt;
cl_kernel openCLGetKernelFromSource(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, int channels,
int depth, const char *build_options)
//construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
//for example split_C2_D3, represent the split kernel with channels = 2 and dataType Depth = 3(Data type is short)
std::stringstream idxStr;
if(channels != -1)
idxStr << "_C" << channels;
if(depth != -1)
idxStr << "_D" << depth;
kernelName += idxStr.str();
std::string fixedOptions = removeDuplicatedWhiteSpaces(build_options);
cl_kernel kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str());
return kernel;
void openCLExecuteKernel(Context *ctx, cl_kernel kernel, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args)
if ( localThreads != NULL)
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
for(size_t i = 0; i < args.size(); i ++)
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
localThreads, 0, NULL, NULL));
cl_event event = NULL;
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
localThreads, 0, NULL, &event));
cl_ulong start_time, end_time, queue_time;
double execute_time = 0;
double total_time = 0;
openCLSafeCall(clWaitForEvents(1, &event));
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &start_time, 0));
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &end_time, 0));
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
sizeof(cl_ulong), &queue_time, 0));
execute_time = (double)(end_time - start_time) / (1000 * 1000);
total_time = (double)(end_time - queue_time) / (1000 * 1000);
total_execute_time += execute_time;
total_kernel_time += total_time;
void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options)
cl_kernel kernel = openCLGetKernelFromSource(ctx, source, kernelName, channels, depth, build_options);
openCLExecuteKernel(ctx, kernel, globalThreads, localThreads, args);
void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args,
channels, depth, NULL);
void openCLExecuteKernel(Context *ctx, const cv::ocl::ProgramEntry* source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
String data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
std::cout << std::endl;
std::cout << "Function Name: " << kernelName;
if(depth >= 0)
std::cout << " |data type: " << data_type[depth];
std::cout << " |channels: " << channels;
std::cout << " |Time Unit: " << "ms" << std::endl;
total_execute_time = 0;
total_kernel_time = 0;
std::cout << "-------------------------------------" << std::endl;
std::cout << std::setiosflags(std::ios::left) << std::setw(15) << "execute time";
std::cout << std::setiosflags(std::ios::left) << std::setw(15) << "launch time";
std::cout << std::setiosflags(std::ios::left) << std::setw(15) << "kernel time" << std::endl;
int i = 0;
for(i = 0; i < RUN_TIMES; i++)
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
std::cout << "average kernel execute time: " << total_execute_time / RUN_TIMES << std::endl; // "ms" << std::endl;
std::cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << std::endl; // "ms" << std::endl;
void openCLExecuteKernelInterop(Context *ctx, const cv::ocl::ProgramSource& source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
//construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
//for example split_C2_D2, represent the split kernel with channels = 2 and dataType Depth = 2 (Data type is char)
std::stringstream idxStr;
if(channels != -1)
idxStr << "_C" << channels;
if(depth != -1)
idxStr << "_D" << depth;
kernelName += idxStr.str();
std::string name = std::string("custom_") +;
ProgramEntry program = { name.c_str(), source.programStr, source.programHash };
cl_kernel kernel = openCLGetKernelFromSource(ctx, &program, kernelName, build_options);
CV_Assert(globalThreads != NULL);
if ( localThreads != NULL)
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
for(size_t i = 0; i < args.size(); i ++)
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
localThreads, 0, NULL, NULL));
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
const size_t size)
int status;
cl_mem con_struct;
con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status);
openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
value, 0, 0, 0));
return con_struct;
}//namespace ocl
}//namespace cv
@ -1,514 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Guoping Long,
// Niko Li,
// Yao Wang,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include <iomanip>
#include <fstream>
#include "cl_programcache.hpp"
namespace cv { namespace ocl {
* The binary caching system to eliminate redundant program source compilation.
* Strictly, this is not a cache because we do not implement evictions right now.
* We shall add such features to trade-off memory consumption and performance when necessary.
cv::Mutex ProgramCache::mutexFiles;
cv::Mutex ProgramCache::mutexCache;
ProgramCache* _programCache = NULL;
ProgramCache* ProgramCache::getProgramCache()
if (NULL == _programCache)
cv::AutoLock lock(getInitializationMutex());
if (NULL == _programCache)
_programCache = new ProgramCache();
return _programCache;
cacheSize = 0;
if (this == _programCache)
cv::AutoLock lock(getInitializationMutex());
if (this == _programCache)
_programCache = NULL;
cl_program ProgramCache::progLookup(const String& srcsign)
std::map<String, cl_program>::iterator iter;
iter = codeCache.find(srcsign);
if(iter != codeCache.end())
return iter->second;
return NULL;
void ProgramCache::addProgram(const String& srcsign, cl_program program)
if (!progLookup(srcsign))
codeCache.insert(std::map<String, cl_program>::value_type(srcsign, program));
void ProgramCache::releaseProgram()
std::map<String, cl_program>::iterator iter;
for(iter = codeCache.begin(); iter != codeCache.end(); iter++)
cacheSize = 0;
static bool enable_disk_cache = true;
static String binpath = "";
void setBinaryDiskCache(int mode, String path)
enable_disk_cache = false;
binpath = "";
if(mode == CACHE_NONE)
enable_disk_cache =
#if defined(_DEBUG) || defined(DEBUG)
if(enable_disk_cache && !path.empty())
binpath = path;
void setBinaryPath(const char *path)
binpath = path;
static const int MAX_ENTRIES = 64;
struct ProgramFileCache
struct CV_DECL_ALIGNED(1) ProgramFileHeader
int hashLength;
//char hash[];
struct CV_DECL_ALIGNED(1) ProgramFileTable
int numberOfEntries;
//int firstEntryOffset[];
struct CV_DECL_ALIGNED(1) ProgramFileConfigurationEntry
int nextEntry;
int dataSize;
int optionsLength;
//char options[];
// char data[];
String fileName_;
const char* hash_;
std::fstream f;
ProgramFileCache(const String& fileName, const char* hash)
: fileName_(fileName), hash_(hash)
if (hash_ != NULL)
||||||, std::ios::in|std::ios::out|std::ios::binary);
int hashLength = 0;
||||||*)&hashLength, sizeof(int));
std::vector<char> fhash(hashLength + 1);
||||||[0], hashLength);
if (f.eof() || strncmp(hash_, &fhash[0], hashLength) != 0)
int getHash(const String& options)
int hash = 0;
for (size_t i = 0; i < options.length(); i++)
hash = (hash << 2) ^ (hash >> 17) ^ options[i];
return (hash + (hash >> 16)) & (MAX_ENTRIES - 1);
bool readConfigurationFromFile(const String& options, std::vector<char>& buf)
if (hash_ == NULL)
return false;
if (!f.is_open())
return false;
f.seekg(0, std::fstream::end);
size_t fileSize = (size_t)f.tellg();
if (fileSize == 0)
std::cerr << "Invalid file (empty): " << fileName_ << std::endl;
return false;
f.seekg(0, std::fstream::beg);
int hashLength = 0;
||||||*)&hashLength, sizeof(int));
CV_Assert(hashLength > 0);
f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg);
int numberOfEntries = 0;
||||||*)&numberOfEntries, sizeof(int));
CV_Assert(numberOfEntries > 0);
if (numberOfEntries != MAX_ENTRIES)
std::cerr << "Invalid file: " << fileName_ << std::endl;
return false;
std::vector<int> firstEntryOffset(numberOfEntries);
||||||*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
int entryNum = getHash(options);
int entryOffset = firstEntryOffset[entryNum];
ProgramFileConfigurationEntry entry;
while (entryOffset > 0)
f.seekg(entryOffset, std::fstream::beg);
assert(sizeof(entry) == sizeof(int)*3);
||||||*)&entry, sizeof(entry));
std::vector<char> foptions(entry.optionsLength);
if ((int)options.length() == entry.optionsLength)
if (entry.optionsLength > 0)
||||||[0], entry.optionsLength);
if (memcmp(&foptions[0], options.c_str(), entry.optionsLength) == 0)
||||||[0], entry.dataSize);
f.seekg(0, std::fstream::beg);
return true;
if (entry.nextEntry <= 0)
entryOffset = entry.nextEntry;
return false;
bool writeConfigurationToFile(const String& options, std::vector<char>& buf)
if (hash_ == NULL)
return true; // don't save programs without hash
if (!f.is_open())
||||||, std::ios::in|std::ios::out|std::ios::binary);
if (!f.is_open())
||||||, std::ios::out|std::ios::binary);
if (!f.is_open())
return false;
f.seekg(0, std::fstream::end);
size_t fileSize = (size_t)f.tellg();
if (fileSize == 0)
f.seekp(0, std::fstream::beg);
int hashLength = strlen(hash_);
f.write((char*)&hashLength, sizeof(int));
f.write(hash_, hashLength);
int numberOfEntries = MAX_ENTRIES;
f.write((char*)&numberOfEntries, sizeof(int));
std::vector<int> firstEntryOffset(MAX_ENTRIES, 0);
f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
||||||, std::ios::in|std::ios::out|std::ios::binary);
f.seekg(0, std::fstream::end);
fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);
int hashLength = 0;
||||||*)&hashLength, sizeof(int));
CV_Assert(hashLength > 0);
f.seekg(sizeof(hashLength) + hashLength, std::fstream::beg);
int numberOfEntries = 0;
||||||*)&numberOfEntries, sizeof(int));
CV_Assert(numberOfEntries > 0);
if (numberOfEntries != MAX_ENTRIES)
std::cerr << "Invalid file: " << fileName_ << std::endl;
return false;
size_t tableEntriesOffset = (size_t)f.tellg();
std::vector<int> firstEntryOffset(numberOfEntries);
||||||*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
int entryNum = getHash(options);
int entryOffset = firstEntryOffset[entryNum];
ProgramFileConfigurationEntry entry;
while (entryOffset > 0)
f.seekg(entryOffset, std::fstream::beg);
assert(sizeof(entry) == sizeof(int)*3);
||||||*)&entry, sizeof(entry));
std::vector<char> foptions(entry.optionsLength);
if ((int)options.length() == entry.optionsLength)
if (entry.optionsLength > 0)
||||||[0], entry.optionsLength);
CV_Assert(memcmp(&foptions, options.c_str(), entry.optionsLength) != 0);
if (entry.nextEntry <= 0)
entryOffset = entry.nextEntry;
if (entryOffset > 0)
f.seekp(entryOffset, std::fstream::beg);
entry.nextEntry = fileSize;
f.write((char*)&entry, sizeof(entry));
firstEntryOffset[entryNum] = fileSize;
f.seekp(tableEntriesOffset, std::fstream::beg);
f.write((char*)&firstEntryOffset[0], sizeof(int)*numberOfEntries);
f.seekp(fileSize, std::fstream::beg);
entry.nextEntry = 0;
entry.dataSize = buf.size();
entry.optionsLength = options.length();
f.write((char*)&entry, sizeof(entry));
f.write(options.c_str(), entry.optionsLength);
f.write(&buf[0], entry.dataSize);
return true;
cl_program getOrBuildProgram(const Context* ctx, const cv::ocl::ProgramEntry* source, const String& options)
cl_int status = 0;
cl_program program = NULL;
std::vector<char> binary;
if (!enable_disk_cache || !readConfigurationFromFile(options, binary))
program = clCreateProgramWithSource(getClContext(ctx), 1, (const char**)&source->programStr, NULL, &status);
cl_device_id device = getClDeviceID(ctx);
status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL);
if(status == CL_SUCCESS)
if (enable_disk_cache)
size_t binarySize;
&binarySize, NULL));
std::vector<char> binary(binarySize);
char* ptr = &binary[0];
if (!writeConfigurationToFile(options, binary))
std::cerr << "Can't write data to file: " << fileName_ << std::endl;
cl_device_id device = getClDeviceID(ctx);
size_t size = binary.size();
const char* ptr = &binary[0];
program = clCreateProgramWithBinary(getClContext(ctx),
1, &device,
(const size_t *)&size, (const unsigned char **)&ptr,
NULL, &status);
status = clBuildProgram(program, 1, &device, options.c_str(), NULL, NULL);
if(status != CL_SUCCESS)
size_t buildLogSize = 0;
openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx),
CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize));
std::vector<char> buildLog; buildLog.resize(buildLogSize);
memset(&buildLog[0], 0, buildLogSize);
openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx),
CL_PROGRAM_BUILD_LOG, buildLogSize, &buildLog[0], NULL));
std::cout << std::endl << "BUILD LOG: "
<< (source->name ? source->name : "dynamic program") << ": "
<< options << "\n";
std::cout << &buildLog[0] << std::endl;
return program;
cl_program ProgramCache::getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
const char *build_options)
std::stringstream src_sign;
if (source->name)
src_sign << source->name;
src_sign << getClContext(ctx);
if (NULL != build_options)
src_sign << "_" << build_options;
cv::AutoLock lockCache(mutexCache);
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
if (!!program)
return program;
cv::AutoLock lockCache(mutexFiles);
// second check
if (source->name)
cv::AutoLock lockCache(mutexCache);
cl_program program = ProgramCache::getProgramCache()->progLookup(src_sign.str());
if (!!program)
return program;
String all_build_options;
if (!ctx->getDeviceInfo().compilationExtraOptions.empty())
all_build_options += ctx->getDeviceInfo().compilationExtraOptions;
if (build_options != NULL)
all_build_options += " ";
all_build_options += build_options;
const DeviceInfo& devInfo = ctx->getDeviceInfo();
String filename = binpath + (source->name ? source->name : "NULL") + "_" + devInfo.platform->platformName + "_" + devInfo.deviceName + ".clb";
ProgramFileCache programFileCache(filename, source->programHash);
cl_program program = programFileCache.getOrBuildProgram(ctx, source, all_build_options);
//Cache the binary for future use if build_options is null
if (source->name)
cv::AutoLock lockCache(mutexCache);
this->addProgram(src_sign.str(), program);
return program;
} // namespace ocl
} // namespace cv
@ -1,85 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
namespace cv {
namespace ocl {
class ProgramCache
static ProgramCache *getProgramCache();
cl_program getProgram(const Context *ctx, const cv::ocl::ProgramEntry* source,
const char *build_options);
void releaseProgram();
//lookup the binary given the file name
// (with acquired mutexCache)
cl_program progLookup(const String& srcsign);
//add program to the cache
// (with acquired mutexCache)
void addProgram(const String& srcsign, cl_program program);
std::map <String, cl_program> codeCache;
unsigned int cacheSize;
//The presumed watermark for the cache volume (256MB). Is it enough?
//We may need more delicate algorithms when necessary later.
//Right now, let's just leave it along.
static const unsigned MAX_PROG_CACHE_SIZE = 1024;
// acquire both mutexes in this order: 1) mutexFiles 2) mutexCache
static cv::Mutex mutexFiles;
static cv::Mutex mutexCache;
}//namespace ocl
}//namespace cv
@ -1,656 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Wang Weiyan,
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(),
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
pixels_per_work_item = 1;
String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty())
build_options = build_options + additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data1.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(),
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data1.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
pixels_per_work_item = 1;
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options = build_options + additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
size_t gt[3] = {src.cols, src.rows, 1};
#ifdef ANDROID
size_t lt[3] = {16, 10, 1};
size_t lt[3] = {16, 16, 1};
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
src.depth(), greenbits, dst.channels(), bidx);
int src_offset = src.offset >> 1, src_step = src.step >> 1;
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
src.depth(), greenbits, src.channels(), bidx);
int src_offset = (int)src.offset, src_step = (int)src.step;
int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
size_t lt[3] = { 16, 16, 1 };
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
Size sz = src.size();
int scn = src.channels(), depth = src.depth(), bidx;
CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F);
switch (code)
CV_Assert(scn == 3 || scn == 4);
dcn = code == COLOR_BGR2BGRA || code == COLOR_RGB2BGRA || code == COLOR_BGRA2RGBA ? 4 : 3;
bool reverse = !(code == COLOR_BGR2BGRA || code == COLOR_BGRA2BGR);
dst.create(sz, CV_MAKE_TYPE(depth, dcn));
RGB_caller(src, dst, reverse);
case COLOR_BGR2BGR565: case COLOR_BGR2BGR555: case COLOR_RGB2BGR565: case COLOR_RGB2BGR555:
CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
bidx = code == COLOR_BGR2BGR565 || code == COLOR_BGR2BGR555 ||
code == COLOR_BGRA2BGR565 || code == COLOR_BGRA2BGR555 ? 0 : 2;
int greenbits = code == COLOR_BGR2BGR565 || code == COLOR_RGB2BGR565 ||
code == COLOR_BGRA2BGR565 || code == COLOR_RGBA2BGR565 ? 6 : 5;
dst.create(sz, CV_8UC2);
toRGB5x5_caller(src, dst, bidx, greenbits, "RGB2RGB5x5");
case COLOR_BGR5652BGR: case COLOR_BGR5552BGR: case COLOR_BGR5652RGB: case COLOR_BGR5552RGB:
dcn = code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA || code == COLOR_BGR5652RGBA || code == COLOR_BGR5552RGBA ? 4 : 3;
CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
bidx = code == COLOR_BGR5652BGR || code == COLOR_BGR5552BGR ||
code == COLOR_BGR5652BGRA || code == COLOR_BGR5552BGRA ? 0 : 2;
int greenbits = code == COLOR_BGR5652BGR || code == COLOR_BGR5652RGB ||
code == COLOR_BGR5652BGRA || code == COLOR_BGR5652RGBA ? 6 : 5;
dst.create(sz, CV_MAKETYPE(depth, dcn));
fromRGB5x5_caller(src, dst, bidx, greenbits, "RGB5x52RGB");
CV_Assert(scn == 2 && depth == CV_8U);
dst.create(sz, CV_8UC1);
int greenbits = code == COLOR_BGR5652GRAY ? 6 : 5;
fromRGB5x5_caller(src, dst, -1, greenbits, "BGR5x52Gray");
CV_Assert(scn == 1 && depth == CV_8U);
dst.create(sz, CV_8UC2);
int greenbits = code == COLOR_GRAY2BGR565 ? 6 : 5;
toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5");
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_BGR2GRAY || code == COLOR_BGRA2GRAY ? 0 : 2;
dst.create(sz, CV_MAKETYPE(depth, 1));
fromRGB_caller(src, dst, bidx, "RGB2Gray");
CV_Assert(scn == 1);
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
dst.create(sz, CV_MAKETYPE(depth, dcn));
fromGray_caller(src, dst, 0, "Gray2RGB");
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_BGR2YUV ? 0 : 2;
dst.create(sz, CV_MAKETYPE(depth, 3));
fromRGB_caller(src, dst, bidx, "RGB2YUV");
if( dcn <= 0 )
dcn = 3;
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
bidx = code == COLOR_YUV2BGR ? 0 : 2;
dst.create(sz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, bidx, "YUV2RGB");
CV_Assert(scn == 1);
CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
dcn = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2RGBA_NV12 ? 4 : 3;
bidx = code == COLOR_YUV2BGRA_NV12 || code == COLOR_YUV2BGR_NV12 ? 0 : 2;
Size dstSz(sz.width, sz.height * 2 / 3);
dst.create(dstSz, CV_MAKETYPE(depth, dcn));
toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_BGR2YCrCb ? 0 : 2;
dst.create(sz, CV_MAKETYPE(depth, 3));
fromRGB_caller(src, dst, bidx, "RGB2YCrCb");
if( dcn <= 0 )
dcn = 3;
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
bidx = code == COLOR_YCrCb2BGR ? 0 : 2;
dst.create(sz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, bidx, "YCrCb2RGB");
CV_Assert(scn == 3 || scn == 4);
bidx = code == COLOR_BGR2XYZ ? 0 : 2;
dst.create(sz, CV_MAKE_TYPE(depth, 3));
Mat c;
if (depth == CV_32F)
float coeffs[] =
0.412453f, 0.357580f, 0.180423f,
0.212671f, 0.715160f, 0.072169f,
0.019334f, 0.119193f, 0.950227f
if (bidx == 0)
std::swap(coeffs[0], coeffs[2]);
std::swap(coeffs[3], coeffs[5]);
std::swap(coeffs[6], coeffs[8]);
Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
int coeffs[] =
1689, 1465, 739,
871, 2929, 296,
79, 488, 3892
if (bidx == 0)
std::swap(coeffs[0], coeffs[2]);
std::swap(coeffs[3], coeffs[5]);
std::swap(coeffs[6], coeffs[8]);
Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
oclMat oclCoeffs(c);
fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs);
if (dcn <= 0)
dcn = 3;
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
bidx = code == COLOR_XYZ2BGR ? 0 : 2;
dst.create(sz, CV_MAKE_TYPE(depth, dcn));
Mat c;
if (depth == CV_32F)
float coeffs[] =
3.240479f, -1.53715f, -0.498535f,
-0.969256f, 1.875991f, 0.041556f,
0.055648f, -0.204043f, 1.057311f
if (bidx == 0)
std::swap(coeffs[0], coeffs[6]);
std::swap(coeffs[1], coeffs[7]);
std::swap(coeffs[2], coeffs[8]);
Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
int coeffs[] =
13273, -6296, -2042,
-3970, 7684, 170,
228, -836, 4331
if (bidx == 0)
std::swap(coeffs[0], coeffs[6]);
std::swap(coeffs[1], coeffs[7]);
std::swap(coeffs[2], coeffs[8]);
Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
oclMat oclCoeffs(c);
toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs);
CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
bidx = code == COLOR_BGR2HSV || code == COLOR_BGR2HLS ||
code == COLOR_BGR2HSV_FULL || code == COLOR_BGR2HLS_FULL ? 0 : 2;
int hrange = depth == CV_32F ? 360 : code == COLOR_BGR2HSV || code == COLOR_RGB2HSV ||
code == COLOR_BGR2HLS || code == COLOR_RGB2HLS ? 180 : 256;
bool is_hsv = code == COLOR_BGR2HSV || code == COLOR_RGB2HSV || code == COLOR_BGR2HSV_FULL || code == COLOR_RGB2HSV_FULL;
dst.create(sz, CV_MAKETYPE(depth, 3));
std::string kernelName = std::string("RGB2") + (is_hsv ? "HSV" : "HLS");
if (is_hsv && depth == CV_8U)
static oclMat sdiv_data;
static oclMat hdiv_data180;
static oclMat hdiv_data256;
static int sdiv_table[256];
static int hdiv_table180[256];
static int hdiv_table256[256];
static volatile bool initialized180 = false, initialized256 = false;
volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
if (!initialized)
int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
oclMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
int v = 255 << hsv_shift;
if (!initialized180 && !initialized256)
for(int i = 1; i < 256; i++ )
sdiv_table[i] = saturate_cast<int>(v/(1.*i));
sdiv_data.upload(Mat(1, 256, CV_32SC1, sdiv_table));
v = hrange << hsv_shift;
for (int i = 1; i < 256; i++ )
hdiv_table[i] = saturate_cast<int>(v/(6.*i));
hdiv_data.upload(Mat(1, 256, CV_32SC1, hdiv_table));
initialized = true;
toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
if (dcn <= 0)
dcn = 3;
CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
bidx = code == COLOR_HSV2BGR || code == COLOR_HLS2BGR ||
code == COLOR_HSV2BGR_FULL || code == COLOR_HLS2BGR_FULL ? 0 : 2;
int hrange = depth == CV_32F ? 360 : code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
code == COLOR_HLS2BGR || code == COLOR_HLS2RGB ? 180 : 255;
bool is_hsv = code == COLOR_HSV2BGR || code == COLOR_HSV2RGB ||
dst.create(sz, CV_MAKETYPE(depth, dcn));
std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
CV_Assert(scn == 4 && depth == CV_8U);
dst.create(sz, CV_MAKETYPE(depth, 4));
std::string kernelName = code == COLOR_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA";
fromRGB_caller(src, dst, 0, kernelName);
CV_Error(Error::StsBadFlag, "Unknown/unsupported color conversion code" );
void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn)
cvtColor_caller(src, dst, code, dcn);
@ -1,75 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Chunpeng Zhang,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
void cv::ocl::columnSum(const oclMat &src, oclMat &dst)
CV_Assert(src.type() == CV_32FC1);
dst.create(src.size(), src.type());
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset));
size_t globalThreads[3] = {dst.cols, 1, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &imgproc_columnsum, "columnSum", globalThreads, localThreads, args, src.oclchannels(), src.depth());
@ -1,174 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
namespace ocl
const char *getOpenCLErrorString( int err )
return "CL_MAP_FAILURE";
// return "unknown error code";
static char buf[256];
sprintf(buf, "%d", err);
return buf;
@ -1,229 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
// Authors:
// * Peter Andreas Entschev,
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
cv::ocl::FAST_OCL::FAST_OCL(int _threshold, bool _nonmaxSupression, double _keypointsRatio) :
nonmaxSupression(_nonmaxSupression), threshold(_threshold), keypointsRatio(_keypointsRatio), count_(0)
void cv::ocl::FAST_OCL::operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
if (image.empty())
(*this)(image, mask, d_keypoints_);
downloadKeypoints(d_keypoints_, keypoints);
void cv::ocl::FAST_OCL::downloadKeypoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints)
if (d_keypoints.empty())
Mat h_keypoints(d_keypoints);
convertKeypoints(h_keypoints, keypoints);
void cv::ocl::FAST_OCL::convertKeypoints(const Mat& h_keypoints, std::vector<KeyPoint>& keypoints)
if (h_keypoints.empty())
CV_Assert(h_keypoints.rows == ROWS_COUNT && h_keypoints.elemSize() == 4);
int npoints = h_keypoints.cols;
const float* loc_x = h_keypoints.ptr<float>(X_ROW);
const float* loc_y = h_keypoints.ptr<float>(Y_ROW);
const float* response_row = h_keypoints.ptr<float>(RESPONSE_ROW);
for (int i = 0; i < npoints; ++i)
KeyPoint kp(loc_x[i], loc_y[i], static_cast<float>(FEATURE_SIZE), -1, response_row[i]);
keypoints[i] = kp;
void cv::ocl::FAST_OCL::operator ()(const oclMat& img, const oclMat& mask, oclMat& keypoints)
calcKeyPointsLocation(img, mask);
keypoints.cols = getKeyPoints(keypoints);
int cv::ocl::FAST_OCL::calcKeyPointsLocation(const oclMat& img, const oclMat& mask)
CV_Assert(img.type() == CV_8UC1);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == img.size()));
int maxKeypoints = static_cast<int>(keypointsRatio * img.size().area());
ensureSizeIsEnough(ROWS_COUNT, maxKeypoints, CV_32SC1, kpLoc_);
if (nonmaxSupression)
ensureSizeIsEnough(img.size(), CV_32SC1, score_);
count_ = calcKeypointsOCL(img, mask, maxKeypoints);
count_ = std::min(count_, maxKeypoints);
return count_;
int cv::ocl::FAST_OCL::calcKeypointsOCL(const oclMat& img, const oclMat& mask, int maxKeypoints)
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = {divUp(img.cols - 6, localThreads[0]) * localThreads[0],
divUp(img.rows - 6, localThreads[1]) * localThreads[1],
Context *clCxt = Context::getContext();
String kernelName = (mask.empty()) ? "calcKeypoints" : "calcKeypointsWithMask";
std::vector< std::pair<size_t, const void *> > args;
int counter = 0;
int err = CL_SUCCESS;
cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(),
CL_MEM_COPY_HOST_PTR, sizeof(int),
&counter, &err);
int kpLocStep = kpLoc_.step / kpLoc_.elemSize();
int scoreStep = score_.step / score_.elemSize();
int nms = (nonmaxSupression) ? 1 : 0;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&nms));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&maxKeypoints));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&threshold));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols));
if (!mask.empty()) args.push_back( std::make_pair( sizeof(cl_int), (void *)&mask.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&scoreStep));
openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1);
counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL));
return counter;
int cv::ocl::FAST_OCL::nonmaxSupressionOCL(oclMat& keypoints)
size_t localThreads[3] = {256, 1, 1};
size_t globalThreads[3] = {count_, 1, 1};
Context *clCxt = Context::getContext();
String kernelName = "nonmaxSupression";
std::vector< std::pair<size_t, const void *> > args;
int counter = 0;
int err = CL_SUCCESS;
cl_mem counterCL = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(),
CL_MEM_COPY_HOST_PTR, sizeof(int),
&counter, &err);
int kpLocStep = kpLoc_.step / kpLoc_.elemSize();
int sStep = score_.step / score_.elemSize();
int kStep = keypoints.step / keypoints.elemSize();
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&counterCL));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&count_));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&kpLocStep));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&sStep));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&kStep));
openCLExecuteKernel(clCxt, &featdetect_fast, kernelName, globalThreads, localThreads, args, -1, -1);
counterCL, CL_TRUE, 0, sizeof(int), &counter, 0, NULL, NULL));
return counter;
int cv::ocl::FAST_OCL::getKeyPoints(oclMat& keypoints)
if (count_ == 0)
return 0;
if (nonmaxSupression)
ensureSizeIsEnough(ROWS_COUNT, count_, CV_32FC1, keypoints);
return nonmaxSupressionOCL(keypoints);
kpLoc_.convertTo(keypoints, CV_32FC1);
Mat k = keypoints;
return count_;
void cv::ocl::FAST_OCL::release()
@ -1,382 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
#if !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
namespace cv { namespace ocl {
void fft_teardown();
} }
void cv::ocl::fft_teardown() { }
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
namespace cv
namespace ocl
void fft_setup();
void fft_teardown();
enum FftType
C2R = 1, // complex to complex
R2C = 2, // real to opencl HERMITIAN_INTERLEAVED
C2C = 3 // opencl HERMITIAN_INTERLEAVED to real
struct FftPlan
clAmdFftPlanHandle plHandle;
FftPlan& operator=(const FftPlan&);
FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
inline clAmdFftPlanHandle getPlanHandle() { return plHandle; }
const Size dft_size;
const int src_step, dst_step;
const int depth;
const int flags;
const FftType type;
class PlanCache
static PlanCache* planCache;
bool started;
std::vector<FftPlan *> planStore;
clAmdFftSetupData *setupData;
friend void fft_setup();
friend void fft_teardown();
static PlanCache* getPlanCache()
if (NULL == planCache)
planCache = new PlanCache();
return planCache;
// return a baked plan->
// if there is one matched plan, return it
// if not, bake a new one, put it into the planStore and return it.
static FftPlan* getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type);
// remove a single plan from the store
// return true if the plan is successfully removed
// else
static bool removePlan(clAmdFftPlanHandle );
PlanCache* PlanCache::planCache = NULL;
void cv::ocl::fft_setup()
PlanCache& pCache = *PlanCache::getPlanCache();
if (pCache.setupData == NULL)
pCache.setupData = new clAmdFftSetupData;
openCLSafeCall(clAmdFftInitSetupData( pCache.setupData ));
pCache.started = true;
void cv::ocl::fft_teardown()
PlanCache& pCache = *PlanCache::getPlanCache();
for(size_t i = 0; i < pCache.planStore.size(); i ++)
delete pCache.planStore[i];
openCLSafeCall( clAmdFftTeardown( ) );
catch (const std::bad_alloc &)
{ }
delete pCache.setupData; pCache.setupData = NULL;
pCache.started = false;
// bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), depth(_depth), flags(_flags), type(_type)
bool is_1d_input = (_dft_size.height == 1);
int is_row_dft = flags & DFT_ROWS;
int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
//clAmdFftResultLocation place;
clAmdFftLayout inLayout;
clAmdFftLayout outLayout;
clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
size_t batchSize = is_row_dft ? dft_size.height : 1;
size_t clLengthsIn[ 3 ] = {1, 1, 1};
size_t clStridesIn[ 3 ] = {1, 1, 1};
//size_t clLengthsOut[ 3 ] = {1, 1, 1};
size_t clStridesOut[ 3 ] = {1, 1, 1};
clLengthsIn[0] = dft_size.width;
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
clStridesIn[0] = 1;
clStridesOut[0] = 1;
case C2C:
clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
case R2C:
inLayout = CLFFT_REAL;
clStridesIn[1] = src_step / CV_ELEM_SIZE(_depth);
clStridesOut[1] = dst_step / (2*CV_ELEM_SIZE(_depth));
case C2R:
outLayout = CLFFT_REAL;
clStridesIn[1] = src_step / (2*CV_ELEM_SIZE(_depth));
clStridesOut[1] = dst_step / CV_ELEM_SIZE(_depth);
//std::runtime_error("does not support this convertion!");
std::cout << "Does not support this convertion!" << std::endl;
throw std::exception();
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) );
openCLSafeCall( clAmdFftSetPlanPrecision( plHandle, depth == CV_64F ? CLFFT_DOUBLE : CLFFT_SINGLE ) );
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
openCLSafeCall( clAmdFftSetPlanBatchSize( plHandle, batchSize ) );
openCLSafeCall( clAmdFftSetPlanInStride ( plHandle, dim, clStridesIn ) );
openCLSafeCall( clAmdFftSetPlanOutStride ( plHandle, dim, clStridesOut ) );
openCLSafeCall( clAmdFftSetPlanDistance ( plHandle, clStridesIn[ dim ], clStridesOut[ dim ]) );
float scale_ = is_scaled_dft ? 1.f / _dft_size.area() : 1.f;
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
//ready to bake
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) );
openCLSafeCall( clAmdFftDestroyPlan( &plHandle ) );
: started(false),
planStore(std::vector<cv::ocl::FftPlan *>()),
FftPlan* cv::ocl::PlanCache::getPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
PlanCache& pCache = *PlanCache::getPlanCache();
std::vector<FftPlan *>& pStore = pCache.planStore;
// go through search
for(size_t i = 0; i < pStore.size(); i ++)
FftPlan *plan = pStore[i];
plan->dft_size.width == _dft_size.width &&
plan->dft_size.height == _dft_size.height &&
plan->flags == _flags &&
plan->src_step == _src_step &&
plan->dst_step == _dst_step &&
plan->depth == _depth &&
plan->type == _type
return plan;
// no baked plan is found
FftPlan *newPlan = new FftPlan(_dft_size, _src_step, _dst_step, _depth, _flags, _type);
return newPlan;
bool cv::ocl::PlanCache::removePlan(clAmdFftPlanHandle plHandle)
PlanCache& pCache = *PlanCache::getPlanCache();
std::vector<FftPlan *>& pStore = pCache.planStore;
for(size_t i = 0; i < pStore.size(); i ++)
if(pStore[i]->getPlanHandle() == plHandle)
pStore.erase(pStore.begin() + i);
delete pStore[i];
return true;
return false;
void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
if(dft_size == Size(0, 0))
dft_size = src.size();
// check if the given dft size is of optimal dft size
CV_Assert(dft_size.area() == getOptimalDFTSize(dft_size.area()));
// the two flags are not compatible
CV_Assert( !((flags & DFT_SCALE) && (flags & DFT_ROWS)) );
//bool is_1d_input = (src.rows == 1);
//int is_row_dft = flags & DFT_ROWS;
//int is_scaled_dft = flags & DFT_SCALE;
int is_inverse = flags & DFT_INVERSE;
bool is_complex_input = src.channels() == 2;
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
int depth = src.depth();
// We don't support real-to-real transform
CV_Assert(is_complex_input || is_complex_output);
FftType type = (FftType)(is_complex_input << 0 | is_complex_output << 1);
case C2C:
dst.create(src.rows, src.cols, CV_MAKE_TYPE(depth, 2));
case R2C:
dst.create(src.rows, src.cols / 2 + 1, CV_MAKE_TYPE(depth, 2));
case C2R:
CV_Assert(dft_size.width / 2 + 1 == src.cols && dft_size.height == src.rows);
dst.create(src.rows, dft_size.width, CV_MAKE_TYPE(depth, 1));
//std::runtime_error("does not support this convertion!");
std::cout << "Does not support this convertion!" << std::endl;
throw std::exception();
clAmdFftPlanHandle plHandle = PlanCache::getPlan(dft_size, src.step, dst.step, depth, flags, type)->getPlanHandle();
//get the buffersize
size_t buffersize = 0;
openCLSafeCall( clAmdFftGetTmpBufSize(plHandle, &buffersize ) );
//allocate the intermediate buffer
// TODO, bind this with the current FftPlan
cl_mem clMedBuffer = NULL;
if (buffersize)
cl_int medstatus;
clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
openCLSafeCall( medstatus );
cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr());
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
(cl_mem *)&, (cl_mem *)&, clMedBuffer ) );
openCLSafeCall( clFinish(clq) );
File diff suppressed because it is too large
Load Diff
@ -1,205 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
namespace cv { namespace ocl {
// used for clAmdBlas library to avoid redundant setup/teardown
void clBlasSetup();
void clBlasTeardown();
}} /* namespace cv { namespace ocl */
#if !defined HAVE_CLAMDBLAS
void cv::ocl::gemm(const oclMat&, const oclMat&, double,
const oclMat&, double, oclMat&, int)
CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
void cv::ocl::clBlasSetup()
CV_Error(Error::OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
void cv::ocl::clBlasTeardown()
//intentionally do nothing
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
using namespace cv;
static bool clBlasInitialized = false;
void cv::ocl::clBlasSetup()
AutoLock lock(getInitializationMutex());
clBlasInitialized = true;
void cv::ocl::clBlasTeardown()
AutoLock lock(getInitializationMutex());
clBlasInitialized = false;
void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
const oclMat &src3, double beta, oclMat &dst, int flags)
CV_Assert(src1.cols == src2.rows &&
(src3.empty() || (src1.rows == src3.rows && src2.cols == src3.cols)));
CV_Assert(!(cv::GEMM_3_T & flags)); // cv::GEMM_3_T is not supported
dst.create(src1.rows, src2.cols, src1.type());
const clAmdBlasTranspose transA = (cv::GEMM_1_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
const clAmdBlasTranspose transB = (cv::GEMM_2_T & flags) ? clAmdBlasTrans : clAmdBlasNoTrans;
const clAmdBlasOrder order = clAmdBlasRowMajor;
const int M = src1.rows;
const int N = src2.cols;
const int K = src1.cols;
int lda = src1.step;
int ldb = src2.step;
int ldc = dst.step;
int offa = src1.offset;
int offb = src2.offset;
int offc = dst.offset;
cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr();
case CV_32FC1:
lda /= sizeof(float);
ldb /= sizeof(float);
ldc /= sizeof(float);
offa /= sizeof(float);
offb /= sizeof(float);
offc /= sizeof(float);
clAmdBlasSgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem), offa, lda, (const cl_mem), offb, ldb,
beta, (cl_mem), offc, ldc, 1, &clq, 0, NULL, NULL)
case CV_64FC1:
lda /= sizeof(double);
ldb /= sizeof(double);
ldc /= sizeof(double);
offa /= sizeof(double);
offb /= sizeof(double);
offc /= sizeof(double);
clAmdBlasDgemmEx(order, transA, transB, M, N, K,
alpha, (const cl_mem), offa, lda, (const cl_mem), offb, ldb,
beta, (cl_mem), offc, ldc, 1, &clq, 0, NULL, NULL)
case CV_32FC2:
lda /= (2*sizeof(float));
ldb /= (2*sizeof(float));
ldc /= (2*sizeof(float));
offa /= (2*sizeof(float));
offb /= (2*sizeof(float));
offc /= (2*sizeof(float));
cl_float2 alpha_2 = {{alpha, 0}};
cl_float2 beta_2 = {{beta, 0}};
clAmdBlasCgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem), offa, lda, (const cl_mem), offb, ldb,
beta_2, (cl_mem), offc, ldc, 1, &clq, 0, NULL, NULL)
case CV_64FC2:
lda /= (2*sizeof(double));
ldb /= (2*sizeof(double));
ldc /= (2*sizeof(double));
offa /= (2*sizeof(double));
offb /= (2*sizeof(double));
offc /= (2*sizeof(double));
cl_double2 alpha_2 = {{alpha, 0}};
cl_double2 beta_2 = {{beta, 0}};
clAmdBlasZgemmEx(order, transA, transB, M, N, K,
alpha_2, (const cl_mem), offa, lda, (const cl_mem), offb, ldb,
beta_2, (cl_mem), offc, ldc, 1, &clq, 0, NULL, NULL)
@ -1,300 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
// compact structure for corners
struct DefCorner
float eig; //eigenvalue of corner
short x; //x coordinate of corner point
short y; //y coordinate of corner point
// compare procedure for corner
//it is used for sort on the host side
struct DefCornerCompare :
public std::binary_function<DefCorner, DefCorner, bool>
bool operator()(const DefCorner a, const DefCorner b) const
return a.eig > b.eig;
// find corners on matrix and put it into array
static void findCorners_caller(
const oclMat& eig_mat, //input matrix worth eigenvalues
oclMat& eigMinMax, //input with min and max values of eigenvalues
const float qualityLevel,
const oclMat& mask,
oclMat& corners, //output array with detected corners
oclMat& counter) //output value with number of detected corners, have to be 0 before call
String opt;
std::vector<int> k;
Context * cxt = Context::getContext();
std::vector< std::pair<size_t, const void*> > args;
const int mask_strip = mask.step / mask.elemSize1();
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(;
int src_pitch = (int)eig_mat.step;
args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)& ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)& ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)& ));
args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)& ));
size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
size_t localThreads[3] = {16, 16, 1};
opt += " -D WITH_MASK=1";
openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * 2 * src.elemSize();
ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
cl_mem dst_data = reinterpret_cast<cl_mem>(;
int all_cols = src.step / src.elemSize();
int pre_cols = (src.offset % src.step) / src.elemSize();
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
int invalid_cols = pre_cols + sec_cols;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
int offset = src.offset / src.elemSize();
// first parallel pass
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
size_t globalThreads[3] = {groupnum * 256, 1, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
args, -1, -1, "-D T=float -D DEPTH_5");
// run final "serial" kernel to find accumulate results from threads and reset corner counter
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
size_t globalThreads[3] = {1, 1, 1};
size_t localThreads[3] = {1, 1, 1};
openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
args, -1, -1);
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
ensureSizeIsEnough(image.size(), CV_32F, eig_);
if (useHarrisDetector)
cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
ensureSizeIsEnough(1,1, CV_32SC1, counter_);
// find max eigenvalue and reset detected counters
minMaxEig_caller(eig_, eig_minmax_, counter_);
// allocate buffer for kernels
int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
int total = tmpCorners_.cols; // by default the number of corner is full array
std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
// find points with high eigenvalue and put it into the output array
findCorners_caller(eig_, eig_minmax_, static_cast<float>(qualityLevel), mask, tmpCorners_, counter_);
// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem), CL_FALSE, 0, sizeof(int), &total, 0, NULL, NULL));
if (total == 0)
// check for trivial case
// blocking read whole corners array (sorted or not sorted)
openCLReadBuffer(tmpCorners_.clCxt, (cl_mem), &tmp[0], tmpCorners_.cols * sizeof(DefCorner));
// sort detected corners on cpu side.
std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
// estimate maximal size of final output array
int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
int D2 = (int)ceil(minDistance * minDistance);
// allocate output buffer
std::vector<Point2f> tmp2;
if (minDistance < 1)
// we have not distance restriction. then just copy with conversion maximal allowed points into output array
for (int i = 0; i < total_max; ++i)
tmp2.push_back(Point2f(tmp[i].x, tmp[i].y));
// we have distance restriction. then start coping to output array from the first element and check distance for each next one
const int cell_size = cvRound(minDistance);
const int grid_width = (image.cols + cell_size - 1) / cell_size;
const int grid_height = (image.rows + cell_size - 1) / cell_size;
std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
for (int i = 0; i < total ; ++i)
DefCorner p = tmp[i];
bool good = true;
int x_cell = static_cast<int>(p.x / cell_size);
int y_cell = static_cast<int>(p.y / cell_size);
int x1 = x_cell - 1;
int y1 = y_cell - 1;
int x2 = x_cell + 1;
int y2 = y_cell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(grid_width - 1, x2);
y2 = std::min(grid_height - 1, y2);
for (int yy = y1; yy <= y2; yy++)
for (int xx = x1; xx <= x2; xx++)
std::vector<Point2i>& m = grid[yy * grid_width + xx];
if (m.empty())
for(size_t j = 0; j < m.size(); j++)
int dx = p.x - m[j].x;
int dy = p.y - m[j].y;
if (dx * dx + dy * dy < D2)
good = false;
goto break_out_;
grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x, p.y));
tmp2.push_back(Point2f(p.x, p.y));
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
int final_size = static_cast<int>(tmp2.size());
if (final_size > 0)
corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
CV_DbgAssert(points.type() == CV_32FC2);
points.cols * sizeof(Point2f),
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,398 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
#if !defined (HAVE_OPENCL)
void cv::ocl::HoughCircles(const oclMat&, oclMat&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
void cv::ocl::HoughCircles(const oclMat&, oclMat&, HoughCirclesBuf&, int, float, float, int, int, int, int, int) { throw_nogpu(); }
void cv::ocl::HoughCirclesDownload(const oclMat&, OutputArray) { throw_nogpu(); }
#else /* !defined (HAVE_OPENCL) */
#define MUL_UP(a, b) ((a)/(b)+1)*(b)
// common functions
int buildPointList_gpu(const oclMat& src, oclMat& list)
const int PIXELS_PER_THREAD = 16;
int totalCount = 0;
int err = CL_SUCCESS;
cl_mem counter = clCreateBuffer(*(cl_context*)src.clCxt->getOpenCLContextPtr(),
const size_t blkSizeX = 32;
const size_t blkSizeY = 4;
size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
const size_t glbSizeX = src.cols % (PIXELS_PER_BLOCK) == 0 ? src.cols : MUL_UP(src.cols, PIXELS_PER_BLOCK);
const size_t glbSizeY = src.rows % blkSizeY == 0 ? src.rows : MUL_UP(src.rows, blkSizeY);
size_t globalThreads[3] = { glbSizeX, glbSizeY, 1 };
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.step ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&counter ));
// WARNING: disabled until
openCLExecuteKernel(src.clCxt, &imgproc_hough, "buildPointList", globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)src.clCxt->getOpenCLCommandQueuePtr(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
return totalCount;
// HoughCircles
void circlesAccumCenters_gpu(const oclMat& list, int count, const oclMat& dx, const oclMat& dy, oclMat& accum, int minRadius, int maxRadius, float idp)
const size_t blkSizeX = 256;
size_t localThreads[3] = { 256, 1, 1 };
const size_t glbSizeX = count % blkSizeX == 0 ? count : MUL_UP(count, blkSizeX);
size_t globalThreads[3] = { glbSizeX, 1, 1 };
const int width = accum.cols - 2;
const int height = accum.rows - 2;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&count ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dx.step ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dy.step ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&accum.step ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&width ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&height ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&minRadius));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&maxRadius));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&idp));
openCLExecuteKernel(accum.clCxt, &imgproc_hough, "circlesAccumCenters", globalThreads, localThreads, args, -1, -1);
int buildCentersList_gpu(const oclMat& accum, oclMat& centers, int threshold)
int totalCount = 0;
int err = CL_SUCCESS;
cl_mem counter = clCreateBuffer(*(cl_context*)accum.clCxt->getOpenCLContextPtr(),
const size_t blkSizeX = 32;
const size_t blkSizeY = 8;
size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
const size_t glbSizeX = (accum.cols - 2) % blkSizeX == 0 ? accum.cols - 2 : MUL_UP(accum.cols - 2, blkSizeX);
const size_t glbSizeY = (accum.rows - 2) % blkSizeY == 0 ? accum.rows - 2 : MUL_UP(accum.rows - 2, blkSizeY);
size_t globalThreads[3] = { glbSizeX, glbSizeY, 1 };
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&accum.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&accum.rows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&accum.step ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)¢ ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&threshold ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&counter ));
openCLExecuteKernel(accum.clCxt, &imgproc_hough, "buildCentersList", globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)accum.clCxt->getOpenCLCommandQueuePtr(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
return totalCount;
int circlesAccumRadius_gpu(const oclMat& centers, int centersCount,
const oclMat& list, int count,
oclMat& circles, int maxCircles,
float dp, int minRadius, int maxRadius, int threshold)
int totalCount = 0;
int err = CL_SUCCESS;
cl_mem counter = clCreateBuffer(*(cl_context*)circles.clCxt->getOpenCLContextPtr(),
const size_t blkSizeX = circles.clCxt->getDeviceInfo().maxWorkGroupSize;
size_t localThreads[3] = { blkSizeX, 1, 1 };
const size_t glbSizeX = centersCount * blkSizeX;
size_t globalThreads[3] = { glbSizeX, 1, 1 };
const int histSize = maxRadius - minRadius + 1;
size_t smemSize = (histSize + 2) * sizeof(int);
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)¢ ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&count ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&maxCircles ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&dp ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&minRadius ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&maxRadius ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&histSize ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&threshold ));
args.push_back( std::make_pair( smemSize , (void *)NULL ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&counter ));
CV_Assert(circles.offset == 0);
openCLExecuteKernel(circles.clCxt, &imgproc_hough, "circlesAccumRadius", globalThreads, localThreads, args, -1, -1);
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)circles.clCxt->getOpenCLCommandQueuePtr(), counter, CL_TRUE, 0, sizeof(int), &totalCount, 0, NULL, NULL));
totalCount = std::min(totalCount, maxCircles);
return totalCount;
} // namespace
void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
HoughCirclesBuf buf;
HoughCircles(src, circles, buf, method, dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
void cv::ocl::HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method,
float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
CV_Assert(src.type() == CV_8UC1);
CV_Assert(src.cols < std::numeric_limits<unsigned short>::max());
CV_Assert(src.rows < std::numeric_limits<unsigned short>::max());
CV_Assert(method == HOUGH_GRADIENT);
CV_Assert(dp > 0);
CV_Assert(minRadius > 0 && maxRadius > minRadius);
CV_Assert(cannyThreshold > 0);
CV_Assert(votesThreshold > 0);
CV_Assert(maxCircles > 0);
const float idp = 1.0f / dp;
cv::ocl::Canny(src, buf.cannyBuf, buf.edges, std::max(cannyThreshold / 2, 1), cannyThreshold);
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.srcPoints);
const int pointsCount = buildPointList_gpu(buf.edges, buf.srcPoints);
if (pointsCount == 0)
ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, buf.accum);
circlesAccumCenters_gpu(buf.srcPoints, pointsCount, buf.cannyBuf.dx, buf.cannyBuf.dy, buf.accum, minRadius, maxRadius, idp);
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, buf.centers);
int centersCount = buildCentersList_gpu(buf.accum, buf.centers, votesThreshold);
if (centersCount == 0)
if (minDist > 1)
cv::AutoBuffer<unsigned int> oldBuf_(centersCount);
cv::AutoBuffer<unsigned int> newBuf_(centersCount);
int newCount = 0;
unsigned int* oldBuf = oldBuf_;
unsigned int* newBuf = newBuf_;
centersCount * sizeof(unsigned int),
const int cellSize = cvRound(minDist);
const int gridWidth = (src.cols + cellSize - 1) / cellSize;
const int gridHeight = (src.rows + cellSize - 1) / cellSize;
std::vector< std::vector<unsigned int> > grid(gridWidth * gridHeight);
const float minDist2 = minDist * minDist;
for (int i = 0; i < centersCount; ++i)
unsigned int p = oldBuf[i];
const int px = p & 0xFFFF;
const int py = (p >> 16) & 0xFFFF;
bool good = true;
int xCell = static_cast<int>(px / cellSize);
int yCell = static_cast<int>(py / cellSize);
int x1 = xCell - 1;
int y1 = yCell - 1;
int x2 = xCell + 1;
int y2 = yCell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(gridWidth - 1, x2);
y2 = std::min(gridHeight - 1, y2);
for (int yy = y1; yy <= y2; ++yy)
for (int xx = x1; xx <= x2; ++xx)
std::vector<unsigned int>& m = grid[yy * gridWidth + xx];
for(size_t j = 0; j < m.size(); ++j)
const int val = m[j];
const int jx = val & 0xFFFF;
const int jy = (val >> 16) & 0xFFFF;
float dx = (float)(px - jx);
float dy = (float)(py - jy);
if (dx * dx + dy * dy < minDist2)
good = false;
goto break_out;
grid[yCell * gridWidth + xCell].push_back(p);
newBuf[newCount++] = p;
newCount * sizeof(unsigned int),
centersCount = newCount;
ensureSizeIsEnough(1, maxCircles, CV_32FC3, circles);
const int circlesCount = circlesAccumRadius_gpu(buf.centers, centersCount,
buf.srcPoints, pointsCount,
circles, maxCircles,
dp, minRadius, maxRadius, votesThreshold);
if (circlesCount > 0)
circles.cols = circlesCount;
void cv::ocl::HoughCirclesDownload(const oclMat& d_circles, cv::OutputArray h_circles_)
// FIX ME: garbage values are copied!
CV_Error(Error::StsNotImplemented, "HoughCirclesDownload is not implemented");
if (d_circles.empty())
CV_Assert(d_circles.rows == 1 && d_circles.type() == CV_32FC3);
h_circles_.create(1, d_circles.cols, CV_32FC3);
Mat h_circles = h_circles_.getMat();
#endif /* !defined (HAVE_OPENCL) */
File diff suppressed because it is too large
Load Diff
@ -1,235 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Comuter Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular urpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
namespace ocl
namespace interpolate
//The following are ported from
// As it is not valid to do pointer offset operations on host for default oclMat's native cl_mem pointer,
// we may have to do this on kernel
void memsetKernel(float val, oclMat &img, int height, int offset);
void normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset);
void forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
int b_offset, int d_offset); // buffer, dst offset
//OpenCL conversion of nppiStVectorWarp_PSF2x2_32f_C1
void vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
oclMat &buffer, int buf_offset, float timeScale, int dst_offset);
//OpenCL conversion of BlendFrames
void blendFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &buffer,
float pos, oclMat &newFrame, cl_mem &, cl_mem &);
// bind a buffer to an image
void bindImgTex(const oclMat &img, cl_mem &tex);
void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1,
const oclMat &fu, const oclMat &fv,
const oclMat &bu, const oclMat &bv,
float pos, oclMat &newFrame, oclMat &buf)
CV_Assert(frame0.type() == CV_32FC1);
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
CV_Assert(fu.size() == frame0.size() && fu.type() == frame0.type());
CV_Assert(fv.size() == frame0.size() && fv.type() == frame0.type());
CV_Assert(bu.size() == frame0.size() && bu.type() == frame0.type());
CV_Assert(bv.size() == frame0.size() && bv.type() == frame0.type());
newFrame.create(frame0.size(), frame0.type());
buf.create(6 * frame0.rows, frame0.cols, CV_32FC1);
size_t step = frame0.step;
CV_Assert(frame1.step == step && fu.step == step && fv.step == step && bu.step == step && bv.step == step && newFrame.step == step && buf.step == step);
cl_mem tex_src0 = 0, tex_src1 = 0;
// warp flow
using namespace interpolate;
bindImgTex(frame0, tex_src0);
bindImgTex(frame1, tex_src1);
// CUDA Offsets
cov0 = 0,
vectorWarp(fu, fu, fv, buf, cov0, pos, fwdU);
vectorWarp(fv, fu, fv, buf, cov0, pos, fwdV);
vectorWarp(bu, bu, bv, buf, cov1, 1.0f - pos, bwdU);
vectorWarp(bv, bu, bv, buf, cov1, 1.0f - pos, bwdU);
blendFrames(frame0, frame1, buf, pos, newFrame, tex_src0, tex_src1);
void interpolate::memsetKernel(float val, oclMat &img, int height, int offset)
Context *clCxt = Context::getContext();
String kernelName = "memsetKernel";
std::vector< std::pair<size_t, const void *> > args;
int step = img.step / sizeof(float);
offset = step * height * offset;
args.push_back( std::make_pair( sizeof(cl_float), (void *)&val));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&offset));
size_t globalThreads[3] = {img.cols, height, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
void interpolate::normalizeKernel(oclMat &buffer, int height, int factor_offset, int dst_offset)
Context *clCxt = Context::getContext();
String kernelName = "normalizeKernel";
std::vector< std::pair<size_t, const void *> > args;
int step = buffer.step / sizeof(float);
factor_offset = step * height * factor_offset;
dst_offset = step * height * dst_offset;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buffer.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&factor_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst_offset));
size_t globalThreads[3] = {buffer.cols, height, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
void interpolate::forwardWarpKernel(const oclMat &src, oclMat &buffer, const oclMat &u, const oclMat &v, const float time_scale,
int b_offset, int d_offset)
Context *clCxt = Context::getContext();
String kernelName = "forwardWarpKernel";
std::vector< std::pair<size_t, const void *> > args;
int f_step = u.step / sizeof(float); // flow step
int b_step = buffer.step / sizeof(float);
b_offset = b_step * src.rows * b_offset;
d_offset = b_step * src.rows * d_offset;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&f_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&b_step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&b_offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&d_offset));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&time_scale));
size_t globalThreads[3] = {src.cols, src.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
void interpolate::vectorWarp(const oclMat &src, const oclMat &u, const oclMat &v,
oclMat &buffer, int b_offset, float timeScale, int d_offset)
memsetKernel(0, buffer, src.rows, b_offset);
forwardWarpKernel(src, buffer, u, v, timeScale, b_offset, d_offset);
normalizeKernel(buffer, src.rows, b_offset, d_offset);
void interpolate::blendFrames(const oclMat &frame0, const oclMat &/*frame1*/, const oclMat &buffer, float pos, oclMat &newFrame, cl_mem &tex_src0, cl_mem &tex_src1)
int step = buffer.step / sizeof(float);
Context *clCxt = Context::getContext();
String kernelName = "blendFramesKernel";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&tex_src0));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&tex_src1));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&frame0.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&frame0.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&step));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&pos));
size_t globalThreads[3] = {frame0.cols, frame0.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &interpolate_frames, kernelName, globalThreads, localThreads, args, -1, -1);
void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
texture = bindTexture(img);
@ -1,134 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
using namespace cv;
using namespace cv::ocl;
KalmanFilter::KalmanFilter(int dynamParams, int measureParams, int controlParams, int type)
init(dynamParams, measureParams, controlParams, type);
void KalmanFilter::init(int DP, int MP, int CP, int type)
CV_Assert( DP > 0 && MP > 0 );
CV_Assert( type == CV_32F || type == CV_64F );
CP = cv::max(CP, 0);
statePre.create(DP, 1, type);
statePost.create(DP, 1, type);
transitionMatrix.create(DP, DP, type);
setIdentity(transitionMatrix, 1);
processNoiseCov.create(DP, DP, type);
setIdentity(processNoiseCov, 1);
measurementNoiseCov.create(MP, MP, type);
setIdentity(measurementNoiseCov, 1);
measurementMatrix.create(MP, DP, type);
errorCovPre.create(DP, DP, type);
errorCovPost.create(DP, DP, type);
gain.create(DP, MP, type);
if( CP > 0 )
controlMatrix.create(DP, CP, type);
temp1.create(DP, DP, type);
temp2.create(MP, DP, type);
temp3.create(MP, MP, type);
temp4.create(MP, DP, type);
temp5.create(MP, 1, type);
CV_EXPORTS const oclMat& KalmanFilter::predict(const oclMat& control)
gemm(transitionMatrix, statePost, 1, oclMat(), 0, statePre);
oclMat temp;
gemm(controlMatrix, control, 1, statePre, 1, statePre);
gemm(transitionMatrix, errorCovPost, 1, oclMat(), 0, temp1);
gemm(temp1, transitionMatrix, 1, processNoiseCov, 1, errorCovPre, GEMM_2_T);
return statePre;
CV_EXPORTS const oclMat& KalmanFilter::correct(const oclMat& measurement)
CV_Assert(measurement.empty() == false);
gemm(measurementMatrix, errorCovPre, 1, oclMat(), 0, temp2);
gemm(temp2, measurementMatrix, 1, measurementNoiseCov, 1, temp3, GEMM_2_T);
Mat temp;
solve(Mat(temp3), Mat(temp2), temp, DECOMP_SVD);
gain = temp4.t();
gemm(measurementMatrix, statePre, -1, measurement, 1, temp5);
gemm(gain, temp5, 1, statePre, 1, statePost);
gemm(gain, temp2, -1, errorCovPre, 1, errorCovPost);
return statePost;
@ -1,451 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Xiaopeng Fu,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
static void generateRandomCenter(const std::vector<Vec2f>& box, float* center, RNG& rng)
size_t j, dims = box.size();
float margin = 1.f/dims;
for( j = 0; j < dims; j++ )
center[j] = ((float)rng*(1.f+margin*2.f)-margin)*(box[j][1] - box[j][0]) + box[j][0];
// This class is copied from matrix.cpp in core module.
class KMeansPPDistanceComputer : public ParallelLoopBody
KMeansPPDistanceComputer( float *_tdist2,
const float *_data,
const float *_dist,
int _dims,
size_t _step,
size_t _stepci )
: tdist2(_tdist2),
stepci(_stepci) { }
void operator()( const cv::Range& range ) const
const int begin = range.start;
const int end = range.end;
for ( int i = begin; i<end; i++ )
tdist2[i] = std::min(normL2Sqr_(data + step*i, data + stepci, dims), dist[i]);
KMeansPPDistanceComputer& operator=(const KMeansPPDistanceComputer&); // to quiet MSVC
float *tdist2;
const float *data;
const float *dist;
const int dims;
const size_t step;
const size_t stepci;
k-means center initialization using the following algorithm:
Arthur & Vassilvitskii (2007) k-means++: The Advantages of Careful Seeding
static void generateCentersPP(const Mat& _data, Mat& _out_centers,
int K, RNG& rng, int trials)
int i, j, k, dims = _data.cols, N = _data.rows;
const float* data = (float*);
size_t step = _data.step/sizeof(data[0]);
std::vector<int> _centers(K);
int* centers = &_centers[0];
std::vector<float> _dist(N*3);
float* dist = &_dist[0], *tdist = dist + N, *tdist2 = tdist + N;
double sum0 = 0;
centers[0] = (unsigned)rng % N;
for( i = 0; i < N; i++ )
dist[i] = normL2Sqr_(data + step*i, data + step*centers[0], dims);
sum0 += dist[i];
for( k = 1; k < K; k++ )
double bestSum = DBL_MAX;
int bestCenter = -1;
for( j = 0; j < trials; j++ )
double p = (double)rng*sum0, s = 0;
for( i = 0; i < N-1; i++ )
if( (p -= dist[i]) <= 0 )
int ci = i;
parallel_for_(Range(0, N),
KMeansPPDistanceComputer(tdist2, data, dist, dims, step, step*ci));
for( i = 0; i < N; i++ )
s += tdist2[i];
if( s < bestSum )
bestSum = s;
bestCenter = ci;
std::swap(tdist, tdist2);
centers[k] = bestCenter;
sum0 = bestSum;
std::swap(dist, tdist);
for( k = 0; k < K; k++ )
const float* src = data + step*centers[k];
float* dst = _out_centers.ptr<float>(k);
for( j = 0; j < dims; j++ )
dst[j] = src[j];
void cv::ocl::distanceToCenters(const oclMat &src, const oclMat ¢ers, Mat &dists, Mat &labels, int distType)
CV_Assert(src.cols * src.channels() == centers.cols * centers.channels());
CV_Assert(src.depth() == CV_32F && centers.depth() == CV_32F);
CV_Assert(distType == NORM_L1 || distType == NORM_L2SQR);
dists.create(src.rows, 1, CV_32FC1);
labels.create(src.rows, 1, CV_32SC1);
std::stringstream build_opt_ss;
build_opt_ss << (distType == NORM_L1 ? "-D L1_DIST" : "-D L2SQR_DIST");
int src_step = src.step / src.elemSize1();
int centers_step = centers.step / centers.elemSize1();
int feature_width = centers.cols * centers.oclchannels();
int src_offset = src.offset / src.elemSize1();
int centers_offset = centers.offset / centers.elemSize1();
int all_dist_count = src.rows * centers.rows;
oclMat all_dist(1, all_dist_count, CV_32FC1);
std::vector<std::pair<size_t, const void *> > args;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)¢;
args.push_back(std::make_pair(sizeof(cl_mem), (void *)&;
args.push_back(std::make_pair(sizeof(cl_int), (void *)&feature_width));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers_step));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void *)&src_offset));
args.push_back(std::make_pair(sizeof(cl_int), (void *)¢ers_offset));
size_t globalThreads[3] = { all_dist_count, 1, 1 };
openCLExecuteKernel(Context::getContext(), &kmeans_kernel,
"distanceToCenters", globalThreads, NULL, args, -1, -1, build_opt_ss.str().c_str());
Mat all_dist_cpu;
for (int i = 0; i < src.rows; ++i)
Point p;
double minVal;
Rect roi(i * centers.rows, 0, centers.rows, 1);
Mat hdr(all_dist_cpu, roi);
cv::minMaxLoc(hdr, &minVal, NULL, &p);
||||||<float>(i, 0) = static_cast<float>(minVal);
||||||<int>(i, 0) = p.x;
///////////////////////////////////k - means /////////////////////////////////////////////////////////
double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,
TermCriteria criteria, int attempts, int flags, oclMat &_centers)
const int SPP_TRIALS = 3;
bool isrow = _src.rows == 1 && _src.oclchannels() > 1;
int N = !isrow ? _src.rows : _src.cols;
int dims = (!isrow ? _src.cols : 1) * _src.oclchannels();
int type = _src.depth();
attempts = std::max(attempts, 1);
CV_Assert(type == CV_32F && K > 0 );
CV_Assert( N >= K );
Mat _labels;
CV_Assert( (_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
_bestLabels.cols * _bestLabels.rows == N &&
_bestLabels.type() == CV_32S );
if( !((_bestLabels.cols == 1 || _bestLabels.rows == 1) &&
_bestLabels.cols * _bestLabels.rows == N &&
_bestLabels.type() == CV_32S &&
_bestLabels.create(N, 1, CV_32S);
_labels.create(_bestLabels.size(), _bestLabels.type());
int* labels = _labels.ptr<int>();
Mat data;
Mat centers(K, dims, type), old_centers(K, dims, type), temp(1, dims, type);
std::vector<int> counters(K);
std::vector<Vec2f> _box(dims);
Vec2f* box = &_box[0];
double best_compactness = DBL_MAX, compactness = 0;
RNG& rng = theRNG();
int a, iter, i, j, k;
if( criteria.type & TermCriteria::EPS )
criteria.epsilon = std::max(criteria.epsilon, 0.);
criteria.epsilon = FLT_EPSILON;
criteria.epsilon *= criteria.epsilon;
if( criteria.type & TermCriteria::COUNT )
criteria.maxCount = std::min(std::max(criteria.maxCount, 2), 100);
criteria.maxCount = 100;
if( K == 1 )
attempts = 1;
criteria.maxCount = 2;
const float* sample = data.ptr<float>();
for( j = 0; j < dims; j++ )
box[j] = Vec2f(sample[j], sample[j]);
for( i = 1; i < N; i++ )
sample = data.ptr<float>(i);
for( j = 0; j < dims; j++ )
float v = sample[j];
box[j][0] = std::min(box[j][0], v);
box[j][1] = std::max(box[j][1], v);
for( a = 0; a < attempts; a++ )
double max_center_shift = DBL_MAX;
for( iter = 0;; )
swap(centers, old_centers);
if( iter == 0 && (a > 0 || !(flags & KMEANS_USE_INITIAL_LABELS)) )
if( flags & KMEANS_PP_CENTERS )
generateCentersPP(data, centers, K, rng, SPP_TRIALS);
for( k = 0; k < K; k++ )
generateRandomCenter(_box, centers.ptr<float>(k), rng);
if( iter == 0 && a == 0 && (flags & KMEANS_USE_INITIAL_LABELS) )
for( i = 0; i < N; i++ )
CV_Assert( (unsigned)labels[i] < (unsigned)K );
// compute centers
centers = Scalar(0);
for( k = 0; k < K; k++ )
counters[k] = 0;
for( i = 0; i < N; i++ )
sample = data.ptr<float>(i);
k = labels[i];
float* center = centers.ptr<float>(k);
for(; j <= dims - 4; j += 4 )
float t0 = center[j] + sample[j];
float t1 = center[j+1] + sample[j+1];
center[j] = t0;
center[j+1] = t1;
t0 = center[j+2] + sample[j+2];
t1 = center[j+3] + sample[j+3];
center[j+2] = t0;
center[j+3] = t1;
for( ; j < dims; j++ )
center[j] += sample[j];
if( iter > 0 )
max_center_shift = 0;
for( k = 0; k < K; k++ )
if( counters[k] != 0 )
// if some cluster appeared to be empty then:
// 1. find the biggest cluster
// 2. find the farthest from the center point in the biggest cluster
// 3. exclude the farthest point from the biggest cluster and form a new 1-point cluster.
int max_k = 0;
for( int k1 = 1; k1 < K; k1++ )
if( counters[max_k] < counters[k1] )
max_k = k1;
double max_dist = 0;
int farthest_i = -1;
float* new_center = centers.ptr<float>(k);
float* old_center = centers.ptr<float>(max_k);
float* _old_center = temp.ptr<float>(); // normalized
float scale = 1.f/counters[max_k];
for( j = 0; j < dims; j++ )
_old_center[j] = old_center[j]*scale;
for( i = 0; i < N; i++ )
if( labels[i] != max_k )
sample = data.ptr<float>(i);
double dist = normL2Sqr_(sample, _old_center, dims);
if( max_dist <= dist )
max_dist = dist;
farthest_i = i;
labels[farthest_i] = k;
sample = data.ptr<float>(farthest_i);
for( j = 0; j < dims; j++ )
old_center[j] -= sample[j];
new_center[j] += sample[j];
for( k = 0; k < K; k++ )
float* center = centers.ptr<float>(k);
CV_Assert( counters[k] != 0 );
float scale = 1.f/counters[k];
for( j = 0; j < dims; j++ )
center[j] *= scale;
if( iter > 0 )
double dist = 0;
const float* old_center = old_centers.ptr<float>(k);
for( j = 0; j < dims; j++ )
double t = center[j] - old_center[j];
dist += t*t;
max_center_shift = std::max(max_center_shift, dist);
if( ++iter == MAX(criteria.maxCount, 2) || max_center_shift <= criteria.epsilon )
// assign labels
Mat dists(1, N, CV_64F);
distanceToCenters(_src, _centers, dists, _labels);
float* dist = dists.ptr<float>(0);
compactness = 0;
for( i = 0; i < N; i++ )
compactness += (double)dist[i];
if( compactness < best_compactness )
best_compactness = compactness;
return best_compactness;
@ -1,151 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jin Ma,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
void KNearestNeighbour::clear()
bool KNearestNeighbour::train(const Mat& trainData, Mat& labels, Mat& sampleIdx,
bool isRegression, int _max_k, bool updateBase)
max_k = _max_k;
bool cv_knn_train = CvKNearest::train(trainData, labels, sampleIdx, isRegression, max_k, updateBase);
CvVectors* s = CvKNearest::samples;
cv::Mat samples_mat(s->count, CvKNearest::var_count + 1, s->type);
float* s1 = (float*)(s + 1);
for(int i = 0; i < s->count; i++)
float* t1 = s->data.fl[i];
for(int j = 0; j < CvKNearest::var_count; j++)
Point pos(j, i);
||||||<float>(pos) = t1[j];
Point pos_label(CvKNearest::var_count, i);
||||||<float>(pos_label) = s1[i];
samples_ocl = samples_mat;
return cv_knn_train;
void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lables)
lables.create(samples.rows, 1, CV_32FC1);
CV_Assert(samples.cols == CvKNearest::var_count);
CV_Assert(samples.type() == CV_32FC1);
CV_Assert(k >= 1 && k <= max_k);
int k1 = KNearest::get_sample_count();
k1 = MIN( k1, k );
String kernel_name = "knn_find_nearest";
cl_ulong local_memory_size = (cl_ulong)Context::getContext()->getDeviceInfo().localMemorySize;
int nThreads = local_memory_size / (2 * k * 4);
if(nThreads >= 256)
nThreads = 256;
int smem_size = nThreads * k * 4 * 2;
size_t local_thread[] = {1, nThreads, 1};
size_t global_thread[] = {1, samples.rows, 1};
char build_option[50];
sprintf(build_option, " ");
sprintf(build_option, "-D DOUBLE_SUPPORT");
std::vector< std::pair<size_t, const void*> > args;
int samples_ocl_step = samples_ocl.step/samples_ocl.elemSize();
int samples_step = samples.step/samples.elemSize();
int lables_step = lables.step/lables.elemSize();
int _regression = 0;
_regression = 1;
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&k));
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_ocl.rows));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_ocl_step));
args.push_back(std::make_pair(sizeof(cl_mem), (void*)&;
args.push_back(std::make_pair(sizeof(cl_int), (void*)&lables_step));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&_regression));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&k1));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&samples_ocl.cols));
args.push_back(std::make_pair(sizeof(cl_int), (void*)&nThreads));
args.push_back(std::make_pair(smem_size, (void*)NULL));
openCLExecuteKernel(Context::getContext(), &knearest, kernel_name, global_thread, local_thread, args, -1, -1, build_option);
@ -1,570 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
namespace cv
namespace ocl
void matchTemplate_SQDIFF(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
void matchTemplate_SQDIFF_NORMED(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
void convolve_32F(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
void matchTemplate_CCORR(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
void matchTemplate_CCORR_NORMED(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
void matchTemplate_CCOFF(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
void matchTemplate_CCOFF_NORMED(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf);
void matchTemplateNaive_SQDIFF(
const oclMat &image, const oclMat &templ, oclMat &result, int cn);
void matchTemplateNaive_CCORR(
const oclMat &image, const oclMat &templ, oclMat &result, int cn);
void extractFirstChannel_32F(
const oclMat &image, oclMat &result);
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
static bool useNaive(int method, int depth, Size size)
if (method == TM_SQDIFF && (depth == CV_32F || !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE)))
return true;
else if(method == TM_CCORR || (method == TM_SQDIFF && depth == CV_8U))
return size.height < 18 && size.width < 18;
return false;
#define UNUSED(x) (void)(x);
UNUSED(method) UNUSED(depth) UNUSED(size)
#undef UNUSED
return true;
void matchTemplate_SQDIFF(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf & buf)
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
if (useNaive(TM_SQDIFF, image.depth(), templ.size()))
matchTemplateNaive_SQDIFF(image, templ, result, image.oclchannels());
// TODO, add double support for ocl::integral
// use CPU integral temporarily
Mat sums, sqsums;
cv::integral(Mat(image.reshape(1)), sums, sqsums);
buf.image_sqsums[0] = sqsums;
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
matchTemplate_CCORR(image, templ, result, buf);
//port CUDA's matchTemplatePrepared_SQDIFF_8U
Context *clCxt = image.clCxt;
String kernelName = "matchTemplate_Prepared_SQDIFF";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
const char * build_opt = image.oclchannels() == 4 ? "-D CN4" : "";
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U, build_opt);
void matchTemplate_SQDIFF_NORMED(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
matchTemplate_CCORR(image, templ, result, buf);
integral(image.reshape(1), buf.image_sums[0]);
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
Context *clCxt = image.clCxt;
String kernelName = "matchTemplate_Prepared_SQDIFF_NORMED";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
void matchTemplateNaive_SQDIFF(
const oclMat &image, const oclMat &templ, oclMat &result, int)
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
String kernelName = "matchTemplate_Naive_SQDIFF";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
void convolve_32F(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
ConvolveBuf convolve_buf;
convolve_buf.user_block_size = buf.user_block_size;
if (image.oclchannels() == 1)
convolve(image, templ, result, true, convolve_buf);
oclMat result_;
convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf);
extractFirstChannel_32F(result_, result);
void matchTemplate_CCORR(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
if (useNaive(TM_CCORR, image.depth(), templ.size()))
matchTemplateNaive_CCORR(image, templ, result, image.oclchannels());
if(image.depth() == CV_8U && templ.depth() == CV_8U)
image.convertTo(buf.imagef, CV_32F);
templ.convertTo(buf.templf, CV_32F);
convolve_32F(buf.imagef, buf.templf, result, buf);
convolve_32F(image, templ, result, buf);
void matchTemplate_CCORR_NORMED(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
cv::ocl::oclMat temp;
matchTemplate_CCORR(image, templ, result, buf);
integral(image.reshape(1), buf.image_sums[0], temp);
if(temp.depth() == CV_64F)
temp.convertTo(buf.image_sqsums[0], CV_32FC1);
buf.image_sqsums[0] = temp;
unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
Context *clCxt = image.clCxt;
String kernelName = "normalizeKernel";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_ulong), (void *)&templ_sqsum));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, 1, CV_8U);
void matchTemplateNaive_CCORR(
const oclMat &image, const oclMat &templ, oclMat &result, int)
CV_Assert((image.depth() == CV_8U && templ.depth() == CV_8U )
|| ((image.depth() == CV_32F && templ.depth() == CV_32F) && result.depth() == CV_32F)
CV_Assert(image.oclchannels() == templ.oclchannels() && (image.oclchannels() == 1 || image.oclchannels() == 4) && result.oclchannels() == 1);
CV_Assert(result.rows == image.rows - templ.rows + 1 && result.cols == image.cols - templ.cols + 1);
Context *clCxt = image.clCxt;
String kernelName = "matchTemplate_Naive_CCORR";
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
void matchTemplate_CCOFF(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
CV_Assert(image.depth() == CV_8U && templ.depth() == CV_8U);
matchTemplate_CCORR(image, templ, result, buf);
Context *clCxt = image.clCxt;
String kernelName;
kernelName = "matchTemplate_Prepared_CCOFF";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
Vec4f templ_sum = Vec4f::all(0);
// to be continued in the following section
if(image.oclchannels() == 1)
integral(image, buf.image_sums[0]);
templ_sum[0] = (float)sum(templ)[0] / templ.size().area();
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
split(image, buf.images);
templ_sum = sum(templ) / templ.size().area();
for(int i = 0; i < image.oclchannels(); i ++)
integral(buf.images[i], buf.image_sums[i]);
case 4:
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
void matchTemplate_CCOFF_NORMED(
const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
image.convertTo(buf.imagef, CV_32F);
templ.convertTo(buf.templf, CV_32F);
matchTemplate_CCORR(buf.imagef, buf.templf, result, buf);
float scale = 1.f / templ.size().area();
Context *clCxt = image.clCxt;
String kernelName;
kernelName = "matchTemplate_Prepared_CCOFF_NORMED";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.rows) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.cols) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.rows) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&templ.cols) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale) );
Vec4f templ_sum = Vec4f::all(0);
Vec4f templ_sqsum = Vec4f::all(0);
// to be continued in the following section
if(image.oclchannels() == 1)
cv::ocl::oclMat temp;
integral(image, buf.image_sums[0], temp);
if(temp.depth() == CV_64F)
temp.convertTo(buf.image_sqsums[0], CV_32FC1);
buf.image_sqsums[0] = temp;
templ_sum[0] = (float)sum(templ)[0];
templ_sqsum[0] = sqrSum(templ)[0];
templ_sqsum[0] -= scale * templ_sum[0] * templ_sum[0];
templ_sum[0] *= scale;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum[0]) );
split(image, buf.images);
templ_sum = sum(templ);
templ_sqsum = sqrSum(templ);
templ_sqsum -= scale * templ_sum * templ_sum;
float templ_sqsum_sum = 0;
for(int i = 0; i < image.oclchannels(); i ++)
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
templ_sum *= scale;
cv::ocl::oclMat temp;
for(int i = 0; i < image.oclchannels(); i ++)
integral(buf.images[i], buf.image_sums[i], temp);
if(temp.depth() == CV_64F)
temp.convertTo(buf.image_sqsums[i], CV_32FC1);
buf.image_sqsums[i] = temp;
case 4:
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[0].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[1].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[2].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sums[3].data) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].offset) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sums[0].step) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[0].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[1].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[2].data) );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&buf.image_sqsums[3].data) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].offset) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&buf.image_sqsums[0].step) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[0]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[1]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[2]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sum[3]) );
args.push_back( std::make_pair( sizeof(cl_float), (void *)&templ_sqsum_sum) );
CV_Error(Error::StsBadArg, "matchTemplate: unsupported number of channels");
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, image.oclchannels(), image.depth());
void extractFirstChannel_32F(const oclMat &image, oclMat &result)
Context *clCxt = image.clCxt;
String kernelName;
kernelName = "extractFirstChannel";
size_t globalThreads[3] = {result.cols, result.rows, 1};
size_t localThreads[3] = {16, 16, 1};
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& );
args.push_back( std::make_pair( sizeof(cl_mem), (void *)& );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.rows) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.cols) );
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.offset));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&image.step));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&result.step));
openCLExecuteKernel(clCxt, &match_template, kernelName, globalThreads, localThreads, args, -1, -1);
} /*cv*/
void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method)
MatchTemplateBuf buf;
matchTemplate(image, templ, result, method, buf);
void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf)
CV_Assert(image.type() == templ.type());
CV_Assert(image.cols >= templ.cols && image.rows >= templ.rows);
typedef void (*Caller)(const oclMat &, const oclMat &, oclMat &, MatchTemplateBuf &);
const Caller callers[] =
::matchTemplate_SQDIFF, ::matchTemplate_SQDIFF_NORMED,
::matchTemplate_CCORR, ::matchTemplate_CCORR_NORMED,
::matchTemplate_CCOFF, ::matchTemplate_CCOFF_NORMED
Caller caller = callers[method];
caller(image, templ, result, buf);
@ -1,632 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Niko Li,
// Yao Wang,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
#define ALIGN 32
#define GPU_MATRIX_MALLOC_STEP(step) (((step) + ALIGN - 1) / ALIGN) * ALIGN
// helper routines
namespace cv
namespace ocl
extern DevMemType gDeviceMemType;
extern DevMemRW gDeviceMemRW;
// convert_C3C4
static void convert_C3C4(const cl_mem &src, oclMat &dst)
Context *clCxt = dst.clCxt;
int pixel_end = dst.wholecols * dst.wholerows - 1;
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[dst.depth()]);
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src));
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholecols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.wholerows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&dstStep_in_pixel));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));
size_t globalThreads[3] = { divUp(dst.wholecols * dst.wholerows, 4), 1, 1 };
#ifdef ANDROID
openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, NULL,
args, -1, -1, buildOptions.c_str());
size_t localThreads[3] = { 256, 1, 1 };
openCLExecuteKernel(clCxt, &convertC3C4, "convertC3C4", globalThreads, localThreads,
args, -1, -1, buildOptions.c_str());
// convert_C4C3
static void convert_C4C3(const oclMat &src, cl_mem &dst)
int srcStep_in_pixel = src.step1() / src.oclchannels();
int pixel_end = src.wholecols * src.wholerows - 1;
Context *clCxt = src.clCxt;
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
std::string buildOptions = format("-D GENTYPE4=%s4", typeMap[src.depth()]);
std::vector< std::pair<size_t, const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&;
args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholecols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.wholerows));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&srcStep_in_pixel));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&pixel_end));
size_t globalThreads[3] = { divUp(src.wholecols * src.wholerows, 4), 1, 1};
#ifdef ANDROID
openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, NULL, args, -1, -1, buildOptions.c_str());
size_t localThreads[3] = { 256, 1, 1};
openCLExecuteKernel(clCxt, &convertC3C4, "convertC4C3", globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
void cv::ocl::oclMat::upload(const Mat &m)
if (!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && m.depth() == CV_64F)
CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
Size wholeSize;
Point ofs;
m.locateROI(wholeSize, ofs);
create(wholeSize, m.type());
if (m.channels() == 3)
int pitch = wholeSize.width * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072;
int err;
cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
(pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
convert_C3C4(temp, *this);
openCLMemcpy2D(clCxt, data, step, m.datastart, m.step, wholeSize.width * elemSize(), wholeSize.height, clMemcpyHostToDevice);
rows = m.rows;
cols = m.cols;
offset = ofs.y * step + ofs.x * elemSize();
cv::ocl::oclMat::operator cv::_InputArray()
return _InputArray(cv::_InputArray::OCL_MAT, this);
cv::ocl::oclMat::operator cv::_OutputArray()
return _OutputArray(cv::_InputArray::OCL_MAT, this);
cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src)
CV_Assert(src.kind() == cv::_InputArray::OCL_MAT);
return *(oclMat*)src.getObj();
cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src)
CV_Assert(src.kind() == cv::_InputArray::OCL_MAT);
return *(oclMat*)src.getObj();
void cv::ocl::oclMat::download(cv::Mat &m) const
m.create(wholerows, wholecols, type());
if(m.channels() == 3)
int pitch = wholecols * 3 * m.elemSize1();
int tail_padding = m.elemSize1() * 3072;
int err;
cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
convert_C4C3(*this, temp);
openCLMemcpy2D(clCxt,, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
openCLMemcpy2D(clCxt,, m.step, data, step, wholecols * elemSize(), wholerows, clMemcpyDeviceToHost);
Size wholesize;
Point ofs;
locateROI(wholesize, ofs);
m.adjustROI(-ofs.y, ofs.y + rows - wholerows, -ofs.x, ofs.x + cols - wholecols);
////////////////////////////////// CopyTo /////////////////////////////////
static void copy_to_with_mask(const oclMat &src, oclMat &dst, const oclMat &mask, String kernelName)
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols &&
src.rows == dst.rows && src.cols == dst.cols
&& mask.type() == CV_8UC1);
std::vector<std::pair<size_t , const void *> > args;
String string_types[4][7] = {{"uchar", "char", "ushort", "short", "int", "float", "double"},
{"uchar2", "char2", "ushort2", "short2", "int2", "float2", "double2"},
{"uchar3", "char3", "ushort3", "short3", "int3", "float3", "double3"},
{"uchar4", "char4", "ushort4", "short4", "int4", "float4", "double4"}
char compile_option[32];
sprintf(compile_option, "-D GENTYPE=%s", string_types[dst.oclchannels() - 1][dst.depth()].c_str());
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
int dststep_in_pixel = dst.step / dst.elemSize(), dstoffset_in_pixel = dst.offset / dst.elemSize();
int srcstep_in_pixel = src.step / src.elemSize(), srcoffset_in_pixel = src.offset / src.elemSize();
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcstep_in_pixel ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&srcoffset_in_pixel ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dststep_in_pixel ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstoffset_in_pixel ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset ));
openCLExecuteKernel(dst.clCxt , &operator_copyToM, kernelName, globalThreads,
localThreads, args, -1, -1, compile_option);
void cv::ocl::oclMat::copyTo( oclMat &mat, const oclMat &mask) const
if (mask.empty())
mat.create(size(), type());
openCLCopyBuffer2D(clCxt,, mat.step, mat.offset,
data, step, cols * elemSize(), rows, offset);
mat.create(size(), type());
copy_to_with_mask(*this, mat, mask, "copy_to_with_mask");
//////////////////////////////// ConvertTo ////////////////////////////////
static void convert_run(const oclMat &src, oclMat &dst, double alpha, double beta)
String kernelName = "convert_to";
float alpha_f = alpha, beta_f = beta;
int sdepth = src.depth(), ddepth = dst.depth();
int sstep1 = (int)src.step1(), dstep1 = (int)dst.step1();
int cols1 = src.cols * src.oclchannels();
char buildOptions[150], convertString[50];
const char * typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
sprintf(convertString, "convert_%s_sat_rte", typeMap[ddepth]);
sprintf(buildOptions, "-D srcT=%s -D dstT=%s -D convertToDstType=%s", typeMap[sdepth],
typeMap[ddepth], CV_32F == ddepth || ddepth == CV_64F ? "" : convertString);
CV_DbgAssert(src.rows == dst.rows && src.cols == dst.cols);
std::vector<std::pair<size_t , const void *> > args;
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { divUp(cols1, localThreads[0]) * localThreads[0],
divUp(dst.rows, localThreads[1]) * localThreads[1], 1 };
int doffset1 = dst.offset / dst.elemSize1();
int soffset1 = src.offset / src.elemSize1();
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols1 ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&sstep1 ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&soffset1 ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstep1 ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&doffset1 ));
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&alpha_f ));
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&beta_f ));
openCLExecuteKernel(dst.clCxt , &operator_convertTo, kernelName, globalThreads,
localThreads, args, -1, -1, buildOptions);
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) &&
(depth() == CV_64F || dst.depth() == CV_64F))
CV_Error(Error::OpenCLDoubleNotSupported, "Selected device doesn't support double");
bool noScale = fabs(alpha - 1) < std::numeric_limits<double>::epsilon()
&& fabs(beta) < std::numeric_limits<double>::epsilon();
if( rtype < 0 )
rtype = type();
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype);
if( sdepth == ddepth && noScale )
oclMat temp;
const oclMat *psrc = this;
if( sdepth != ddepth && psrc == &dst )
psrc = &(temp = *this);
dst.create( size(), rtype );
convert_run(*psrc, dst, alpha, beta);
//////////////////////////////// setTo ////////////////////////////////////
oclMat &cv::ocl::oclMat::operator = (const Scalar &s)
return *this;
#ifdef CL_VERSION_1_2
template <typename CLT, typename PT>
static std::vector<uchar> cvt1(const cv::Scalar & s)
std::vector<uchar> _buf(sizeof(CLT));
CLT * const buf = reinterpret_cast<CLT *>(&_buf[0]);
buf[0] = saturate_cast<PT>(s[0]);
return _buf;
template <typename CLT, typename PT>
static std::vector<uchar> cvt2(const cv::Scalar & s)
std::vector<uchar> _buf(sizeof(CLT));
CLT * const buf = reinterpret_cast<CLT *>(&_buf[0]);
buf->s[0] = saturate_cast<PT>(s[0]);
buf->s[1] = saturate_cast<PT>(s[1]);
return _buf;
template <typename CLT, typename PT>
static std::vector<uchar> cvt4(const cv::Scalar & s)
std::vector<uchar> _buf(sizeof(CLT));
CLT * const buf = reinterpret_cast<CLT *>(&_buf[0]);
buf->s[0] = saturate_cast<PT>(s[0]);
buf->s[1] = saturate_cast<PT>(s[1]);
buf->s[2] = saturate_cast<PT>(s[2]);
buf->s[3] = saturate_cast<PT>(s[3]);
return _buf;
typedef std::vector<uchar> (*ConvertFunc)(const cv::Scalar & s);
static std::vector<uchar> scalarToCLVector(const cv::Scalar & s, int type)
const int depth = CV_MAT_DEPTH(type);
const int channels = CV_MAT_CN(type);
static const ConvertFunc funcs[4][7] =
{ cvt1<cl_uchar, uchar>, cvt1<cl_char, char>, cvt1<cl_ushort, ushort>, cvt1<cl_short, short>,
cvt1<cl_int, int>, cvt1<cl_float, float>, cvt1<cl_double, double> },
{ cvt2<cl_uchar2, uchar>, cvt2<cl_char2, char>, cvt2<cl_ushort2, ushort>, cvt2<cl_short2, short>,
cvt2<cl_int2, int>, cvt2<cl_float2, float>, cvt2<cl_double2, double> },
{ 0, 0, 0, 0, 0, 0, 0 },
{ cvt4<cl_uchar4, uchar>, cvt4<cl_char4, char>, cvt4<cl_ushort4, ushort>, cvt4<cl_short4, short>,
cvt4<cl_int4, int>, cvt4<cl_float4, float>, cvt4<cl_double4, double> }
ConvertFunc func = funcs[channels - 1][depth];
return func(s);
static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, String kernelName)
std::vector<std::pair<size_t , const void *> > args;
size_t localThreads[3] = {16, 16, 1};
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
if (dst.type() == CV_8UC1)
globalThreads[0] = ((dst.cols + 4) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char channelMap[] = { ' ', ' ', '2', '4', '4' };
std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
Mat mat(1, 1, dst.type(), scalar);
#ifdef CL_VERSION_1_2
// this enables backwards portability to
// run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) && dst.isContinuous())
std::vector<uchar> p = ::scalarToCLVector(scalar, CV_MAKE_TYPE(dst.depth(), dst.oclchannels()));
(cl_mem), (void*)&p[0], p.size(),
0, dst.step * dst.rows, 0, NULL, NULL);
oclMat m(mat);
args.push_back( std::make_pair( sizeof(cl_mem) , (void*)& ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
openCLExecuteKernel(dst.clCxt , &operator_setTo, kernelName, globalThreads,
localThreads, args, -1, -1, buildOptions.c_str());
static void set_to_withmask_run(const oclMat &dst, const Scalar &scalar, const oclMat &mask, String kernelName)
CV_DbgAssert( dst.rows == mask.rows && dst.cols == mask.cols);
std::vector<std::pair<size_t , const void *> > args;
size_t localThreads[3] = { 16, 16, 1 };
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
int step_in_pixel = dst.step / dst.elemSize(), offset_in_pixel = dst.offset / dst.elemSize();
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
const char channelMap[] = { ' ', ' ', '2', '4', '4' };
std::string buildOptions = format("-D GENTYPE=%s%c", typeMap[dst.depth()], channelMap[dst.channels()]);
oclMat m(Mat(1, 1, dst.type(), scalar));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&step_in_pixel ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset_in_pixel ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.step ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&mask.offset ));
openCLExecuteKernel(dst.clCxt , &operator_setToM, kernelName, globalThreads,
localThreads, args, -1, -1, buildOptions.c_str());
oclMat &cv::ocl::oclMat::setTo(const Scalar &scalar, const oclMat &mask)
CV_Assert(mask.type() == CV_8UC1);
CV_Assert( this->depth() >= 0 && this->depth() <= 6 );
CV_DbgAssert( !this->empty());
if (mask.empty())
set_to_withoutmask_run(*this, scalar, type() == CV_8UC1 ?
"set_to_without_mask_C1_D0" : "set_to_without_mask");
set_to_withmask_run(*this, scalar, mask, "set_to_with_mask");
return *this;
oclMat cv::ocl::oclMat::reshape(int new_cn, int new_rows) const
if( new_rows != 0 && new_rows != rows)
CV_Error( Error::StsBadFunc, "oclMat's number of rows can not be changed for current version" );
oclMat hdr = *this;
int cn = oclchannels();
if (new_cn == 0)
new_cn = cn;
int total_width = cols * cn;
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
new_rows = rows * total_width / new_cn;
if (new_rows != 0 && new_rows != rows)
int total_size = total_width * rows;
if (!isContinuous())
CV_Error(Error::BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
if ((unsigned)new_rows > (unsigned)total_size)
CV_Error(Error::StsOutOfRange, "Bad new number of rows");
total_width = total_size / new_rows;
if (total_width * new_rows != total_size)
CV_Error(Error::StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
hdr.rows = new_rows;
hdr.step = total_width * elemSize1();
int new_width = total_width / new_cn;
if (new_width * new_cn != total_width)
CV_Error(Error::BadNumChannels, "The total width is not divisible by the new number of channels");
hdr.cols = new_width;
hdr.wholecols = new_width;
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
return hdr;
void cv::ocl::oclMat::createEx(Size size, int type,
DevMemRW rw_type, DevMemType mem_type)
createEx(size.height, size.width, type, rw_type, mem_type);
void cv::ocl::oclMat::create(int _rows, int _cols, int _type)
createEx(_rows, _cols, _type, gDeviceMemRW, gDeviceMemType);
void cv::ocl::oclMat::createEx(int _rows, int _cols, int _type,
DevMemRW rw_type, DevMemType mem_type)
clCxt = Context::getContext();
/* core logic */
_type &= Mat::TYPE_MASK;
if( rows == _rows && cols == _cols && type() == _type && data )
if( data )
CV_DbgAssert( _rows >= 0 && _cols >= 0 );
if( _rows > 0 && _cols > 0 )
flags = Mat::MAGIC_VAL + _type;
rows = _rows;
cols = _cols;
wholerows = _rows;
wholecols = _cols;
size_t esz = elemSize();
void *dev_ptr;
openCLMallocPitchEx(clCxt, &dev_ptr, &step, GPU_MATRIX_MALLOC_STEP(esz * cols), rows, rw_type, mem_type);
if (esz * cols == step)
flags |= Mat::CONTINUOUS_FLAG;
int64 _nettosize = (int64)step * rows;
size_t nettosize = (size_t)_nettosize;
datastart = data = (uchar *)dev_ptr;
dataend = data + nettosize;
refcount = (int *)fastMalloc(sizeof(*refcount));
*refcount = 1;
void cv::ocl::oclMat::release()
if( refcount && CV_XADD(refcount, -1) == 1 )
data = datastart = dataend = 0;
step = rows = cols = 0;
offset = wholerows = wholecols = 0;
refcount = 0;
oclMat& cv::ocl::oclMat::operator+=( const oclMat& m )
add(*this, m, *this);
return *this;
oclMat& cv::ocl::oclMat::operator-=( const oclMat& m )
subtract(*this, m, *this);
return *this;
oclMat& cv::ocl::oclMat::operator*=( const oclMat& m )
multiply(*this, m, *this);
return *this;
oclMat& cv::ocl::oclMat::operator/=( const oclMat& m )
divide(*this, m, *this);
return *this;
@ -1,226 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
using namespace std;
namespace cv
namespace ocl
// provide additional methods for the user to interact with the command queue after a task is fired
static void openCLExecuteKernel_2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options, FLUSH_MODE finish_mode)
//construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
std::stringstream idxStr;
if(channels != -1)
idxStr << "_C" << channels;
if(depth != -1)
idxStr << "_D" << depth;
kernelName += idxStr.str().c_str();
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
if ( localThreads != NULL)
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
cv::ocl::openCLVerifyKernel(clCxt, kernel, localThreads);
for(size_t i = 0; i < args.size(); i ++)
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), kernel, 3, NULL, globalThreads,
localThreads, 0, NULL, NULL));
void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
channels, depth, NULL, finish_mode);
void openCLExecuteKernel2(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options, FLUSH_MODE finish_mode)
openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
build_options, finish_mode);
cl_mem bindTexture(const oclMat &mat)
cl_mem texture;
cl_image_format format;
int err;
int depth = mat.depth();
int channels = mat.oclchannels();
case CV_8U:
format.image_channel_data_type = CL_UNSIGNED_INT8;
case CV_32S:
format.image_channel_data_type = CL_UNSIGNED_INT32;
case CV_32F:
format.image_channel_data_type = CL_FLOAT;
CV_Error(-1, "Image forma is not supported");
case 1:
format.image_channel_order = CL_R;
case 3:
format.image_channel_order = CL_RGB;
case 4:
format.image_channel_order = CL_RGBA;
CV_Error(-1, "Image format is not supported");
#ifdef CL_VERSION_1_2
//this enables backwards portability to
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = mat.cols;
desc.image_height = mat.rows;
desc.image_depth = 0;
desc.image_array_size = 1;
desc.image_row_pitch = 0;
desc.image_slice_pitch = 0;
desc.buffer = NULL;
desc.num_mip_levels = 0;
desc.num_samples = 0;
texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
texture = clCreateImage2D(
size_t origin[] = { 0, 0, 0 };
size_t region[] = { mat.cols, mat.rows, 1 };
cl_mem devData;
if (mat.cols * mat.elemSize() != mat.step)
devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows
* mat.elemSize(), NULL, NULL);
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem), devData, origin, origin,
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
devData = (cl_mem);
clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0);
if ((mat.cols * mat.elemSize() != mat.step))
return texture;
Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
return makePtr<TextureCL>(bindTexture(mat), mat.rows, mat.cols, mat.type());
void releaseTexture(cl_mem& texture)
}//namespace ocl
}//namespace cv
@ -1,391 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jin Ma,
// Sen Liu,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other Materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#include "precomp.hpp"
#include "opencv2/imgproc/types_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencl_kernels.hpp"
#if defined _MSC_VER
#define snprintf sprintf_s
namespace cv
namespace ocl
// The function calculates center of gravity and the central second order moments
static void icvCompleteMomentState( CvMoments* moments )
double cx = 0, cy = 0;
double mu20, mu11, mu02;
assert( moments != 0 );
moments->inv_sqrt_m00 = 0;
if( fabs(moments->m00) > DBL_EPSILON )
double inv_m00 = 1. / moments->m00;
cx = moments->m10 * inv_m00;
cy = moments->m01 * inv_m00;
moments->inv_sqrt_m00 = std::sqrt( fabs(inv_m00) );
// mu20 = m20 - m10*cx
mu20 = moments->m20 - moments->m10 * cx;
// mu11 = m11 - m10*cy
mu11 = moments->m11 - moments->m10 * cy;
// mu02 = m02 - m01*cy
mu02 = moments->m02 - moments->m01 * cy;
moments->mu20 = mu20;
moments->mu11 = mu11;
moments->mu02 = mu02;
// mu30 = m30 - cx*(3*mu20 + cx*m10)
moments->mu30 = moments->m30 - cx * (3 * mu20 + cx * moments->m10);
mu11 += mu11;
// mu21 = m21 - cx*(2*mu11 + cx*m01) - cy*mu20
moments->mu21 = moments->m21 - cx * (mu11 + cx * moments->m01) - cy * mu20;
// mu12 = m12 - cy*(2*mu11 + cy*m10) - cx*mu02
moments->mu12 = moments->m12 - cy * (mu11 + cy * moments->m10) - cx * mu02;
// mu03 = m03 - cy*(3*mu02 + cy*m01)
moments->mu03 = moments->m03 - cy * (3 * mu02 + cy * moments->m01);
static void icvContourMoments( CvSeq* contour, CvMoments* mom )
if( contour->total )
CvSeqReader reader;
int lpt = contour->total;
double a00, a10, a01, a20, a11, a02, a30, a21, a12, a03;
cvStartReadSeq( contour, &reader, 0 );
size_t reader_size = lpt << 1;
cv::Mat reader_mat(1,reader_size,CV_32FC1);
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float)
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
if( is_float )
for(size_t i = 0; i < reader_size; ++i)
||||||<float>(0, i++) = ((CvPoint2D32f*)(reader.ptr))->x;
||||||<float>(0, i) = ((CvPoint2D32f*)(reader.ptr))->y;
CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
for(size_t i = 0; i < reader_size; ++i)
||||||<float>(0, i++) = ((CvPoint*)(reader.ptr))->x;
||||||<float>(0, i) = ((CvPoint*)(reader.ptr))->y;
CV_NEXT_SEQ_ELEM( contour->elem_size, reader );
cv::ocl::oclMat dst_a(10, lpt, CV_64FC1);
cv::ocl::oclMat reader_oclmat(reader_mat);
int llength = std::min(lpt,128);
size_t localThreads[3] = { llength, 1, 1};
size_t globalThreads[3] = { lpt, 1, 1};
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&contour->total ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
cl_int dst_step = (cl_int)dst_a.step;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step ));
char builOption[128];
snprintf(builOption, 128, "-D CV_8UC1");
openCLExecuteKernel(dst_a.clCxt, &moments, "icvContourMoments", globalThreads, localThreads, args, -1, -1, builOption);
cv::Mat dst(dst_a);
a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
for (int i = 0; i < contour->total; ++i)
a00 +=<cl_long>(0, i);
a10 +=<cl_long>(1, i);
a01 +=<cl_long>(2, i);
a20 +=<cl_long>(3, i);
a11 +=<cl_long>(4, i);
a02 +=<cl_long>(5, i);
a30 +=<cl_long>(6, i);
a21 +=<cl_long>(7, i);
a12 +=<cl_long>(8, i);
a03 +=<cl_long>(9, i);
a00 = cv::sum(dst.row(0))[0];
a10 = cv::sum(dst.row(1))[0];
a01 = cv::sum(dst.row(2))[0];
a20 = cv::sum(dst.row(3))[0];
a11 = cv::sum(dst.row(4))[0];
a02 = cv::sum(dst.row(5))[0];
a30 = cv::sum(dst.row(6))[0];
a21 = cv::sum(dst.row(7))[0];
a12 = cv::sum(dst.row(8))[0];
a03 = cv::sum(dst.row(9))[0];
double db1_2, db1_6, db1_12, db1_24, db1_20, db1_60;
if( fabs(a00) > FLT_EPSILON )
if( a00 > 0 )
db1_2 = 0.5;
db1_6 = 0.16666666666666666666666666666667;
db1_12 = 0.083333333333333333333333333333333;
db1_24 = 0.041666666666666666666666666666667;
db1_20 = 0.05;
db1_60 = 0.016666666666666666666666666666667;
db1_2 = -0.5;
db1_6 = -0.16666666666666666666666666666667;
db1_12 = -0.083333333333333333333333333333333;
db1_24 = -0.041666666666666666666666666666667;
db1_20 = -0.05;
db1_60 = -0.016666666666666666666666666666667;
// spatial moments
mom->m00 = a00 * db1_2;
mom->m10 = a10 * db1_6;
mom->m01 = a01 * db1_6;
mom->m20 = a20 * db1_12;
mom->m11 = a11 * db1_24;
mom->m02 = a02 * db1_12;
mom->m30 = a30 * db1_20;
mom->m21 = a21 * db1_60;
mom->m12 = a12 * db1_60;
mom->m03 = a03 * db1_20;
icvCompleteMomentState( mom );
Moments ocl_moments(oclMat& src, bool binary) //for image
CV_Assert(src.oclchannels() == 1);
if(src.type() == CV_64FC1 && !Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
oclMat mask;
if(src.type() != CV_8UC1)
src.convertTo(mask, CV_8UC1);
oclMat src8u(src.size(), CV_8UC1);
src8u.setTo(Scalar(255), mask);
src = src8u;
const int TILE_SIZE = 256;
CvMoments mom;
memset(&mom, 0, sizeof(mom));
cv::Size size = src.size();
int blockx, blocky;
blockx = (size.width + TILE_SIZE - 1)/TILE_SIZE;
blocky = (size.height + TILE_SIZE - 1)/TILE_SIZE;
oclMat dst_m;
int tile_height = TILE_SIZE;
size_t localThreads[3] = {1, tile_height, 1};
size_t globalThreads[3] = {blockx, size.height, 1};
dst_m.create(blocky * 10, blockx, CV_64FC1);
dst_m.create(blocky * 10, blockx, CV_32FC1);
int src_step = (int)(src.step/src.elemSize());
int dstm_step = (int)(dst_m.step/dst_m.elemSize());
std::vector<std::pair<size_t , const void *> > args,args_sum;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)& ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_m.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dstm_step ));
int binary_;
binary_ = 1;
binary_ = 0;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&binary_));
char builOption[128];
if(binary || src.type() == CV_8UC1)
snprintf(builOption, 128, "-D CV_8UC1");
}else if(src.type() == CV_16UC1)
snprintf(builOption, 128, "-D CV_16UC1");
}else if(src.type() == CV_16SC1)
snprintf(builOption, 128, "-D CV_16SC1");
}else if(src.type() == CV_32FC1)
snprintf(builOption, 128, "-D CV_32FC1");
}else if(src.type() == CV_64FC1)
snprintf(builOption, 128, "-D CV_64FC1");
CV_Error( CV_StsUnsupportedFormat, "" );
openCLExecuteKernel(Context::getContext(), &moments, "CvMoments", globalThreads, localThreads, args, -1, -1, builOption);
Mat tmp(dst_m);
tmp.convertTo(tmp, CV_64FC1);
double tmp_m[10] = {0};
for(int j = 0; j < tmp.rows; j += 10)
for(int i = 0; i < tmp.cols; i++)
tmp_m[0] +=<double>(j, i);
tmp_m[1] +=<double>(j + 1, i);
tmp_m[2] +=<double>(j + 2, i);
tmp_m[3] +=<double>(j + 3, i);
tmp_m[4] +=<double>(j + 4, i);
tmp_m[5] +=<double>(j + 5, i);
tmp_m[6] +=<double>(j + 6, i);
tmp_m[7] +=<double>(j + 7, i);
tmp_m[8] +=<double>(j + 8, i);
tmp_m[9] +=<double>(j + 9, i);
mom.m00 = tmp_m[0];
mom.m10 = tmp_m[1];
mom.m01 = tmp_m[2];
mom.m20 = tmp_m[3];
mom.m11 = tmp_m[4];
mom.m02 = tmp_m[5];
mom.m30 = tmp_m[6];
mom.m21 = tmp_m[7];
mom.m12 = tmp_m[8];
mom.m03 = tmp_m[9];
icvCompleteMomentState( &mom );
return mom;
Moments ocl_moments(InputArray _contour) //for contour
CvMoments mom;
memset(&mom, 0, sizeof(mom));
Mat arr = _contour.getMat();
CvMat c_array = arr;
const void* array = &c_array;
CvSeq* contour = 0;
if( CV_IS_SEQ( array ))
contour = (CvSeq*)(array);
if( !CV_IS_SEQ_POINT_SET( contour ))
CV_Error( CV_StsBadArg, "The passed sequence is not a valid contour" );
int type, coi = 0;
CvMat stub, *mat = (CvMat*)(array);
CvContour contourHeader;
CvSeqBlock block;
if( !contour )
mat = cvGetMat( mat, &stub, &coi );
type = CV_MAT_TYPE( mat->type );
if( type == CV_32SC2 || type == CV_32FC2 )
contour = cvPointSeqFromMat(
mat, &contourHeader, &block );
icvContourMoments(contour, &mom);
return mom;
@ -1,402 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#if (__GNUC__ == 4) && (__GNUC_MINOR__ == 6)
# pragma GCC diagnostic ignored "-Warray-bounds"
#include "precomp.hpp"
#include "opencl_kernels.hpp"
using namespace cv;
using namespace cv::ocl;
// Auxiliray stuff
// Declarations
class DjSets
DjSets(int n);
int find(int elem);
int merge(int set1, int set2);
std::vector<int> parent;
std::vector<int> rank;
std::vector<int> size;
DjSets(const DjSets &) {}
DjSets operator =(const DjSets &);
template <typename T>
struct GraphEdge
GraphEdge() {}
GraphEdge(int to, int next, const T &val) : to(to), next(next), val(val) {}
int to;
int next;
T val;
template <typename T>
class Graph
typedef GraphEdge<T> Edge;
Graph(int numv, int nume_max);
void addEdge(int from, int to, const T &val = T());
std::vector<int> start;
std::vector<Edge> edges;
int numv;
int nume_max;
int nume;
Graph(const Graph &) {}
Graph operator =(const Graph &) {}
struct SegmLinkVal
SegmLinkVal() {}
SegmLinkVal(int dr, int dsp) : dr(dr), dsp(dsp) {}
bool operator <(const SegmLinkVal &other) const
return dr + dsp < other.dr + other.dsp;
int dr;
int dsp;
struct SegmLink
SegmLink() {}
SegmLink(int from, int to, const SegmLinkVal &val)
: from(from), to(to), val(val) {}
bool operator <(const SegmLink &other) const
return val < other.val;
int from;
int to;
SegmLinkVal val;
// Implementation
DjSets DjSets::operator = (const DjSets &/*obj*/)
//cout << "Invalid DjSets constructor\n";
CV_Error(-1, "Invalid DjSets constructor\n");
return *this;
DjSets::DjSets(int n) : parent(n), rank(n, 0), size(n, 1)
for (int i = 0; i < n; ++i)
parent[i] = i;
inline int DjSets::find(int elem)
int set = elem;
while (set != parent[set])
set = parent[set];
while (elem != parent[elem])
int next = parent[elem];
parent[elem] = set;
elem = next;
return set;
inline int DjSets::merge(int set1, int set2)
if (rank[set1] < rank[set2])
parent[set1] = set2;
size[set2] += size[set1];
return set2;
if (rank[set2] < rank[set1])
parent[set2] = set1;
size[set1] += size[set2];
return set1;
parent[set1] = set2;
size[set2] += size[set1];
return set2;
template <typename T>
Graph<T>::Graph(int numv, int nume_max) : start(numv, -1), edges(nume_max)
this->numv = numv;
this->nume_max = nume_max;
nume = 0;
template <typename T>
inline void Graph<T>::addEdge(int from, int to, const T &val)
edges[nume] = Edge(to, start[from], val);
start[from] = nume;
inline int pix(int y, int x, int ncols)
return y * ncols + x;
inline int sqr(int x)
return x * x;
inline int dist2(const cv::Vec4b &lhs, const cv::Vec4b &rhs)
return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]) + sqr(lhs[2] - rhs[2]);
inline int dist2(const cv::Vec2s &lhs, const cv::Vec2s &rhs)
return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]);
} // anonymous namespace
namespace cv
namespace ocl
void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize, TermCriteria criteria)
CV_Assert(src.type() == CV_8UC4);
const int nrows = src.rows;
const int ncols = src.cols;
const int hr = sr;
const int hsp = sp;
// Perform mean shift procedure and obtain region and spatial maps
oclMat h_rmap, h_spmap;
meanShiftProc(src, h_rmap, h_spmap, sp, sr, criteria);
Mat rmap = h_rmap;
Mat spmap = h_spmap;
Graph<SegmLinkVal> g(nrows * ncols, 4 * (nrows - 1) * (ncols - 1)
+ (nrows - 1) + (ncols - 1));
// Make region adjacent graph from image
Vec4b r1;
Vec4b r2[4];
Vec2s sp1;
Vec2s sp2[4];
int dr[4];
int dsp[4];
for (int y = 0; y < nrows - 1; ++y)
Vec4b *ry = rmap.ptr<Vec4b>(y);
Vec4b *ryp = rmap.ptr<Vec4b>(y + 1);
Vec2s *spy = spmap.ptr<Vec2s>(y);
Vec2s *spyp = spmap.ptr<Vec2s>(y + 1);
for (int x = 0; x < ncols - 1; ++x)
r1 = ry[x];
sp1 = spy[x];
r2[0] = ry[x + 1];
r2[1] = ryp[x];
r2[2] = ryp[x + 1];
r2[3] = ryp[x];
sp2[0] = spy[x + 1];
sp2[1] = spyp[x];
sp2[2] = spyp[x + 1];
sp2[3] = spyp[x];
dr[0] = dist2(r1, r2[0]);
dr[1] = dist2(r1, r2[1]);
dr[2] = dist2(r1, r2[2]);
dsp[0] = dist2(sp1, sp2[0]);
dsp[1] = dist2(sp1, sp2[1]);
dsp[2] = dist2(sp1, sp2[2]);
r1 = ry[x + 1];
sp1 = spy[x + 1];
dr[3] = dist2(r1, r2[3]);
dsp[3] = dist2(sp1, sp2[3]);
g.addEdge(pix(y, x, ncols), pix(y, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
g.addEdge(pix(y, x, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[1], dsp[1]));
g.addEdge(pix(y, x, ncols), pix(y + 1, x + 1, ncols), SegmLinkVal(dr[2], dsp[2]));
g.addEdge(pix(y, x + 1, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[3], dsp[3]));
for (int y = 0; y < nrows - 1; ++y)
r1 =<Vec4b>(y, ncols - 1);
r2[0] =<Vec4b>(y + 1, ncols - 1);
sp1 =<Vec2s>(y, ncols - 1);
sp2[0] =<Vec2s>(y + 1, ncols - 1);
dr[0] = dist2(r1, r2[0]);
dsp[0] = dist2(sp1, sp2[0]);
g.addEdge(pix(y, ncols - 1, ncols), pix(y + 1, ncols - 1, ncols), SegmLinkVal(dr[0], dsp[0]));
for (int x = 0; x < ncols - 1; ++x)
r1 =<Vec4b>(nrows - 1, x);
r2[0] =<Vec4b>(nrows - 1, x + 1);
sp1 =<Vec2s>(nrows - 1, x);
sp2[0] =<Vec2s>(nrows - 1, x + 1);
dr[0] = dist2(r1, r2[0]);
dsp[0] = dist2(sp1, sp2[0]);
g.addEdge(pix(nrows - 1, x, ncols), pix(nrows - 1, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
DjSets comps(g.numv);
// Find adjacent components
for (int v = 0; v < g.numv; ++v)
for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
int c1 = comps.find(v);
int c2 = comps.find(g.edges[e_it].to);
if (c1 != c2 && g.edges[e_it].val.dr < hr && g.edges[e_it].val.dsp < hsp)
comps.merge(c1, c2);
std::vector<SegmLink> edges;
// Prepare edges connecting differnet components
for (int v = 0; v < g.numv; ++v)
int c1 = comps.find(v);
for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
int c2 = comps.find(g.edges[e_it].to);
if (c1 != c2)
edges.push_back(SegmLink(c1, c2, g.edges[e_it].val));
// Sort all graph's edges connecting differnet components (in asceding order)
std::sort(edges.begin(), edges.end());
// Exclude small components (starting from the nearest couple)
for (size_t i = 0; i < edges.size(); ++i)
int c1 = comps.find(edges[i].from);
int c2 = comps.find(edges[i].to);
if (c1 != c2 && (comps.size[c1] < minsize || comps.size[c2] < minsize))
comps.merge(c1, c2);
// Compute sum of the pixel's colors which are in the same segment
Mat h_src = src;
std::vector<Vec4i> sumcols(nrows * ncols, Vec4i(0, 0, 0, 0));
for (int y = 0; y < nrows; ++y)
Vec4b *h_srcy = h_src.ptr<Vec4b>(y);
for (int x = 0; x < ncols; ++x)
int parent = comps.find(pix(y, x, ncols));
Vec4b col = h_srcy[x];
Vec4i &sumcol = sumcols[parent];
sumcol[0] += col[0];
sumcol[1] += col[1];
sumcol[2] += col[2];
// Create final image, color of each segment is the average color of its pixels
dst.create(src.size(), src.type());
for (int y = 0; y < nrows; ++y)
Vec4b *dsty = dst.ptr<Vec4b>(y);
for (int x = 0; x < ncols; ++x)
int parent = comps.find(pix(y, x, ncols));
const Vec4i &sumcol = sumcols[parent];
Vec4b &dstcol = dsty[x];
dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
@ -1,107 +0,0 @@
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Niko Li,
// Rock Li,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
__kernel void LUT_C1( __global const srcT * src, __global const dstT *lut,
__global dstT *dst,
int cols1, int rows,
int src_offset1,
int lut_offset1,
int dst_offset1,
int src_step1, int dst_step1)
int x1 = get_global_id(0);
int y = get_global_id(1);
if (x1 < cols1 && y < rows)
int src_index = mad24(y, src_step1, src_offset1 + x1);
int dst_index = mad24(y, dst_step1, dst_offset1 + x1);
dst[dst_index] = lut[lut_offset1 + src[src_index]];
__kernel void LUT_C2( __global const srcT * src, __global const dstT *lut,
__global dstT *dst,
int cols1, int rows,
int src_offset1,
int lut_offset1,
int dst_offset1,
int src_step1, int dst_step1)
int x1 = get_global_id(0) << 1;
int y = get_global_id(1);
if (x1 < cols1 && y < rows)
int src_index = mad24(y, src_step1, src_offset1 + x1);
int dst_index = mad24(y, dst_step1, dst_offset1 + x1);
dst[dst_index ] = lut[lut_offset1 + (src[src_index ] << 1) ];
dst[dst_index + 1] = x1 + 1 < cols1 ? lut[lut_offset1 + (src[src_index + 1] << 1) + 1] : dst[dst_index + 1];
__kernel void LUT_C4( __global const srcT * src, __global const dstT *lut,
__global dstT *dst,
int cols1, int rows,
int src_offset1,
int lut_offset1,
int dst_offset1,
int src_step1, int dst_step1)
int x1 = get_global_id(0) << 2;
int y = get_global_id(1);
if (x1 < cols1 && y < rows)
int src_index = mad24(y, src_step1, src_offset1 + x1);
int dst_index = mad24(y, dst_step1, dst_offset1 + x1);
dst[dst_index ] = lut[lut_offset1 + (src[src_index ] << 2) ];
dst[dst_index + 1] = x1 + 1 < cols1 ? lut[lut_offset1 + (src[src_index + 1] << 2) + 1] : dst[dst_index + 1];
dst[dst_index + 2] = x1 + 2 < cols1 ? lut[lut_offset1 + (src[src_index + 2] << 2) + 2] : dst[dst_index + 2];
dst[dst_index + 3] = x1 + 3 < cols1 ? lut[lut_offset1 + (src[src_index + 3] << 2) + 3] : dst[dst_index + 3];
@ -1,107 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#ifdef BINARY
__kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_step, int src1_offset,
__global srcT *src2, int src2_step, int src2_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
#ifdef INTEL_DEVICE //workaround for intel compiler bug
if(src1_index >= 0 && src2_index >= 0)
dstT t0 = convertToDstT(src1[src1_index]);
dstT t1 = convertToDstT(src2[src2_index]);
dstT t2 = t0 - t1;
dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
__kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int src1_offset,
__global dstT *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
#ifdef INTEL_DEVICE //workaround for intel compiler bug
if(src1_index >= 0)
dstT t0 = convertToDstT(src1[src1_index]);
dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
@ -1,143 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#if defined (FUNC_ADD)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index]));
#if defined (FUNC_SUB)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]));
#if defined (FUNC_MUL)
#if defined (HAVE_SCALAR)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar * convertToWT(src2[src2_index]));
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index]));
#if defined (FUNC_DIV)
#if defined (HAVE_SCALAR)
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) * scalar / convertToWT(src2[src2_index]));
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index]));
#if defined (FUNC_ABS_DIFF)
#define EXPRESSION WT value = convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]); \
value = value > (WT)(0) ? value : -value; \
dst[dst_index] = convertToT(value);
#if defined (FUNC_MIN)
#define EXPRESSION dst[dst_index] = min( src1[src1_index], src2[src2_index] );
#if defined (FUNC_MAX)
#define EXPRESSION dst[dst_index] = max( src1[src1_index], src2[src2_index] );
///////////////////////////////////////////// ADD ////////////////////////////////////////////////////
__kernel void arithm_binary_op_mat(__global T *src1, int src1_step, int src1_offset,
__global T *src2, int src2_step, int src2_offset,
__global T *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
// add mat with scale
__kernel void arithm_binary_op_mat_scalar(__global T *src1, int src1_step, int src1_offset,
__global T *src2, int src2_step, int src2_offset,
WT scalar,
__global T *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
@ -1,75 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
__kernel void addWeighted(__global T * src1, int src1_step1, int src1_offset1,
__global T * src2, int src2_step1, int src2_offset1,
__global T * dst, int dst_step1, int dst_offset1,
WT alpha, WT beta, WT gama,
int cols1, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols1 && y < rows)
int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step1, x + dst_offset1);
dst[dst_index] = convertToT(src1[src1_index]*alpha + src2[src2_index]*beta + gama);
@ -1,97 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#if defined (FUNC_ADD)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + convertToWT(src2[src2_index]));
#if defined (FUNC_SUB)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - convertToWT(src2[src2_index]));
#if defined (FUNC_MUL)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * convertToWT(src2[src2_index]));
#if defined (FUNC_DIV)
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) / convertToWT(src2[src2_index]));
///////////////////////////////// add with mask //////////////////////////////////
__kernel void arithm_binary_op_mat_mask(__global T * src1, int src1_step, int src1_offset,
__global T * src2, int src2_step, int src2_offset,
__global uchar * mask, int mask_step, int mask_offset,
__global T * dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int mask_index = mad24(y, mask_step, x + mask_offset);
if (mask[mask_index])
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, dst_offset + x);
@ -1,103 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#if defined (FUNC_ADD)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar);
#if defined (FUNC_SUB)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar);
#if defined (FUNC_MUL)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar);
#if defined (FUNC_DIV)
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src1[src1_index] == zero ? zero : convertToT(scalar / convertToWT(src1[src1_index]));
#if defined (FUNC_ABS)
#define EXPRESSION \
T value = src1[src1_index] > (T)(0) ? src1[src1_index] : -src1[src1_index]; \
dst[dst_index] = value;
#if defined (FUNC_ABS_DIFF)
#define EXPRESSION WT value = convertToWT(src1[src1_index]) - scalar; \
value = value > (WT)(0) ? value : -value; \
dst[dst_index] = convertToT(value);
///////////////////////////////// Add with scalar /////////////////////////////////
__kernel void arithm_binary_op_scalar (__global T *src1, int src1_step, int src1_offset,
WT scalar,
__global T *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
@ -1,96 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#if defined (FUNC_ADD)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) + scalar);
#if defined (FUNC_SUB)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) - scalar);
#if defined (FUNC_MUL)
#define EXPRESSION dst[dst_index] = convertToT(convertToWT(src1[src1_index]) * scalar);
#if defined (FUNC_DIV)
#define EXPRESSION T zero = (T)(0); \
dst[dst_index] = src2[src2_index] == zero ? zero : \
convertToT(convertToWT(src1[src1_index]) / scalar[0]);
//////////////////////////// Add with scalar with mask ////////////////////////////
__kernel void arithm_binary_op_scalar_mask(__global T *src1, int src1_step, int src1_offset,
WT scalar,
__global uchar *mask, int mask_step, int mask_offset,
__global T *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int mask_index = mad24(y, mask_step, x + mask_offset);
if (mask[mask_index])
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, dst_offset + x);
@ -1,82 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jiang Liyuan,
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
/////////////////////////////////////////// bitwise_binary //////////////////////////////////////////
__kernel void arithm_bitwise_binary(__global uchar * src1, int src1_step, int src1_offset,
__global uchar * src2, int src2_step, int src2_offset,
__global uchar * dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
#if elemSize > 1
x *= elemSize;
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
#if elemSize > 1
#pragma unroll
for (int i = 0; i < elemSize; i += vlen)
ucharv t0 = vloadn(0, src1 + src1_index + i);
ucharv t1 = vloadn(0, src2 + src2_index + i);
ucharv t2 = t0 Operation t1;
vstoren(t2, 0, dst + dst_index + i);
dst[dst_index] = src1[src1_index] Operation src2[src2_index];
@ -1,88 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jiang Liyuan,
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
__kernel void arithm_bitwise_binary_mask(__global uchar * src1, int src1_step, int src1_offset,
__global uchar * src2, int src2_step, int src2_offset,
__global uchar * mask, int mask_step, int mask_offset,
__global uchar * dst, int dst_step, int dst_offset,
int cols1, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols1 && y < rows)
int mask_index = mad24(y, mask_step, mask_offset + x);
if (mask[mask_index])
#if elemSize > 1
x *= elemSize;
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
#if elemSize > 1
#pragma unroll
for (int i = 0; i < elemSize; i += vlen)
ucharv t0 = vloadn(0, src1 + src1_index + i);
ucharv t1 = vloadn(0, src2 + src2_index + i);
ucharv t2 = t0 Operation t1;
vstoren(t2, 0, dst + dst_index + i);
dst[dst_index] = src1[src1_index] Operation src2[src2_index];
@ -1,82 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jiang Liyuan,
// Peng Xiao,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
__kernel void arithm_bitwise_binary_scalar(
__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2,
__global uchar *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
#if elemSize > 1
x *= elemSize;
int src1_index = mad24(y, src1_step, src1_offset + x);
int dst_index = mad24(y, dst_step, dst_offset + x);
#if elemSize > 1
#pragma unroll
for (int i = 0; i < elemSize; i += vlen)
ucharv t0 = vloadn(0, src1 + src1_index + i);
ucharv t1 = vloadn(0, src2 + i);
ucharv t2 = t0 Operation t1;
vstoren(t2, 0, dst + dst_index + i);
dst[dst_index] = src1[src1_index] Operation src2[0];
@ -1,86 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jiang Liyuan,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
__kernel void arithm_bitwise_binary_scalar_mask(__global uchar *src1, int src1_step, int src1_offset,
__global uchar *src2,
__global uchar *mask, int mask_step, int mask_offset,
__global uchar *dst, int dst_step, int dst_offset,
int cols, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int mask_index = mad24(y, mask_step, x + mask_offset);
if (mask[mask_index])
#if elemSize > 1
x *= elemSize;
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
#if elemSize > 1
#pragma unroll
for (int i = 0; i < elemSize; i += vlen)
ucharv t0 = vloadn(0, src1 + src1_index + i);
ucharv t1 = vloadn(0, src2 + i);
ucharv t2 = t0 Operation t1;
vstoren(t2, 0, dst + dst_index + i);
dst[dst_index] = src1[src1_index] Operation src2[0];
@ -1,253 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jiang Liyuan,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
__kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int src1_offset,
__global uchar *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
x = x << 2;
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x);
uchar4 src1_data = vload4(0, src1 + src1_index);
uchar4 dst_data = vload4(0, dst + dst_index);
uchar4 tmp_data = ~src1_data;
dst_data.x = dst_index + 0 < dst_end ? tmp_data.x : dst_data.x;
dst_data.y = dst_index + 1 < dst_end ? tmp_data.y : dst_data.y;
dst_data.z = dst_index + 2 < dst_end ? tmp_data.z : dst_data.z;
dst_data.w = dst_index + 3 < dst_end ? tmp_data.w : dst_data.w;
vstore4(dst_data, 0, dst + dst_index);
__kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src1_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
x = x << 2;
int src1_index = mad24(y, src1_step, x + src1_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + x);
char4 src1_data = vload4(0, src1 + src1_index);
char4 dst_data = vload4(0, dst + dst_index);
char4 tmp_data = ~src1_data;
dst_data.x = dst_index + 0 < dst_end ? tmp_data.x : dst_data.x;
dst_data.y = dst_index + 1 < dst_end ? tmp_data.y : dst_data.y;
dst_data.z = dst_index + 2 < dst_end ? tmp_data.z : dst_data.z;
dst_data.w = dst_index + 3 < dst_end ? tmp_data.w : dst_data.w;
vstore4(dst_data, 0, dst + dst_index);
__kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int src1_offset,
__global ushort *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
x = x << 2;
#ifdef dst_align
#undef dst_align
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index));
ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index));
ushort4 tmp_data = ~ src1_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
*((__global ushort4 *)((__global char *)dst + dst_index)) = dst_data;
__kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int src1_offset,
__global short *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
x = x << 2;
#ifdef dst_align
#undef dst_align
#define dst_align ((dst_offset >> 1) & 3)
int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1));
int dst_start = mad24(y, dst_step, dst_offset);
int dst_end = mad24(y, dst_step, dst_offset + dst_step1);
int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8);
short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index));
short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index));
short4 tmp_data = ~ src1_data;
dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x;
dst_data.y = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : dst_data.y;
dst_data.z = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.z : dst_data.z;
dst_data.w = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) ? tmp_data.w : dst_data.w;
*((__global short4 *)((__global char *)dst + dst_index)) = dst_data;
__kernel void arithm_bitwise_not_D4 (__global int *src1, int src1_step, int src1_offset,
__global int *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
int data1 = *((__global int *)((__global char *)src1 + src1_index));
int tmp = ~ data1;
*((__global int *)((__global char *)dst + dst_index)) = tmp;
__kernel void arithm_bitwise_not_D5 (__global char *src, int src_step, int src_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src_index = mad24(y, src_step, (x << 2) + src_offset);
int dst_index = mad24(y, dst_step, (x << 2) + dst_offset);
char4 data;
data = *((__global char4 *)((__global char *)src + src_index));
data = ~ data;
*((__global char4 *)((__global char *)dst + dst_index)) = data;
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_bitwise_not_D6 (__global char *src, int src_step, int src_offset,
__global char *dst, int dst_step, int dst_offset,
int rows, int cols, int dst_step1)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src_index = mad24(y, src_step, (x << 3) + src_offset);
int dst_index = mad24(y, dst_step, (x << 3) + dst_offset);
char8 data;
data = *((__global char8 *)((__global char *)src + src_index));
data = ~ data;
*((__global char8 *)((__global char *)dst + dst_index)) = data;
@ -1,141 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#define CV_PI M_PI
#define CV_PI M_PI_F
__kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
__global float *src2, int src2_step, int src2_offset,
__global float *dst1, int dst1_step, int dst1_offset, // magnitude
__global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
int rows, int cols)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
float x = src1[src1_index];
float y = src2[src2_index];
float x2 = x * x;
float y2 = y * y;
float magnitude = sqrt(x2 + y2);
float tmp = y >= 0 ? 0 : CV_PI*2;
tmp = x < 0 ? CV_PI : tmp;
float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
#ifdef DEGREE
cartToPolar *= (180/CV_PI);
dst1[dst1_index] = magnitude;
dst2[dst2_index] = cartToPolar;
#if defined (DOUBLE_SUPPORT)
__kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset,
__global double *src2, int src2_step, int src2_offset,
__global double *dst1, int dst1_step, int dst1_offset,
__global double *dst2, int dst2_step, int dst2_offset,
int rows, int cols)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
int src1_index = mad24(y, src1_step, x + src1_offset);
int src2_index = mad24(y, src2_step, x + src2_offset);
int dst1_index = mad24(y, dst1_step, x + dst1_offset);
int dst2_index = mad24(y, dst2_step, x + dst2_offset);
double x = src1[src1_index];
double y = src2[src2_index];
double x2 = x * x;
double y2 = y * y;
double magnitude = sqrt(x2 + y2);
float tmp = y >= 0 ? 0 : CV_PI*2;
tmp = x < 0 ? CV_PI : tmp;
float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON) + tmp :
tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
#ifdef DEGREE
cartToPolar *= (180/CV_PI);
dst1[dst1_index] = magnitude;
dst2[dst2_index] = cartToPolar;
@ -1,74 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
__kernel void arithm_compare(__global T * src1, int src1_step1, int src1_offset1,
__global T * src2, int src2_step1, int src2_offset1,
__global uchar * dst, int dst_step1, int dst_offset1,
int cols1, int rows)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols1 && y < rows)
int src1_index = mad24(y, src1_step1, x + src1_offset1);
int src2_index = mad24(y, src2_step1, x + src2_offset1);
int dst_index = mad24(y, dst_step1, x + dst_offset1);
dst[dst_index] = convert_uchar(src1[src1_index] Operation src2[src2_index] ? 255 : 0);
@ -1,111 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Wu Zailong,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
__kernel void arithm_exp_C1(__global srcT *src, __global srcT *dst,
int cols1, int rows,
int srcOffset1, int dstOffset1,
int srcStep1, int dstStep1)
int x = get_global_id(0);
int y = get_global_id(1);
if(x < cols1 && y < rows)
int srcIdx = mad24(y, srcStep1, x + srcOffset1);
int dstIdx = mad24(y, dstStep1, x + dstOffset1);
dst[dstIdx] = exp(src[srcIdx]);
__kernel void arithm_exp_C2(__global srcT *src, __global srcT *dst,
int cols1, int rows,
int srcOffset1, int dstOffset1,
int srcStep1, int dstStep1)
int x1 = get_global_id(0) << 1;
int y = get_global_id(1);
if(x1 < cols1 && y < rows)
int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
dst[dstIdx] = exp(src[srcIdx]);
dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1];
__kernel void arithm_exp_C4(__global srcT *src, __global srcT *dst,
int cols1, int rows,
int srcOffset1, int dstOffset1,
int srcStep1, int dstStep1)
int x1 = get_global_id(0) << 2;
int y = get_global_id(1);
if(x1 < cols1 && y < rows)
int srcIdx = mad24(y, srcStep1, x1 + srcOffset1);
int dstIdx = mad24(y, dstStep1, x1 + dstOffset1);
dst[dstIdx] = exp(src[srcIdx]);
dst[dstIdx + 1] = x1 + 1 < cols1 ? exp(src[srcIdx + 1]) : dst[dstIdx + 1];
dst[dstIdx + 2] = x1 + 2 < cols1 ? exp(src[srcIdx + 2]) : dst[dstIdx + 2];
dst[dstIdx + 3] = x1 + 3 < cols1 ? exp(src[srcIdx + 3]) : dst[dstIdx + 3];
@ -1,125 +0,0 @@
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
// License Agreement
// For Open Source Computer Vision Library
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
// @Authors
// Jia Haipeng,
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
///////////////////////////////////////////// flip rows ///////////////////////////////////////////////
__kernel void arithm_flip_rows(__global T * src, int src_step, int src_offset,
__global T * dst, int dst_step, int dst_offset,
int rows, int cols, int thread_rows, int thread_cols)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < thread_rows)
int src_index_0 = mad24(y, src_step, x + src_offset);
int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset);
int dst_index_0 = mad24(y, dst_step, x + dst_offset);
int dst_index_1 = mad24(rows - y - 1, dst_step, x + dst_offset);
T data0 = src[src_index_0], data1 = src[src_index_1];
dst[dst_index_0] = data1;
dst[dst_index_1] = data0;
__kernel void arithm_flip_rows_cols(__global T * src, int src_step, int src_offset,
__global T * dst, int dst_step, int dst_offset,
int rows, int cols, int thread_rows, int thread_cols)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < thread_rows)
int src_index_0 = mad24(y, src_step, x + src_offset);
int dst_index_0 = mad24(rows - y - 1, dst_step, cols - x - 1 + dst_offset);
int src_index_1 = mad24(rows - y - 1, src_step, cols - x - 1 + src_offset);
int dst_index_1 = mad24(y, dst_step, x + dst_offset);
T data0 = src[src_index_0], data1 = src[src_index_1];
dst[dst_index_0] = data0;
dst[dst_index_1] = data1;
///////////////////////////////////////////// flip cols ///////////////////////////////////////////////
__kernel void arithm_flip_cols(__global T * src, int src_step, int src_offset,
__global T * dst, int dst_step, int dst_offset,
int rows, int cols, int thread_rows, int thread_cols)
int x = get_global_id(0);
int y = get_global_id(1);
if (x < thread_cols && y < rows)
int src_index_0 = mad24(y, src_step, x + src_offset);
int dst_index_0 = mad24(y, dst_step, cols - x - 1 + dst_offset);
int src_index_1 = mad24(y, src_step, cols - x - 1 + src_offset);
int dst_index_1 = mad24(y, dst_step, x + dst_offset);
T data0 = src[src_index_0], data1 = src[src_index_1];
dst[dst_index_1] = data1;
dst[dst_index_0] = data0;
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user