/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other oclMaterials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #ifndef __OPENCV_OCL_HPP__ #define __OPENCV_OCL_HPP__ #include #include #include "opencv2/core.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/objdetect.hpp" namespace cv { namespace ocl { enum { CVCL_DEVICE_TYPE_DEFAULT = (1 << 0), CVCL_DEVICE_TYPE_CPU = (1 << 1), CVCL_DEVICE_TYPE_GPU = (1 << 2), CVCL_DEVICE_TYPE_ACCELERATOR = (1 << 3), //CVCL_DEVICE_TYPE_CUSTOM = (1 << 4) CVCL_DEVICE_TYPE_ALL = 0xFFFFFFFF }; enum DevMemRW { DEVICE_MEM_R_W = 0, DEVICE_MEM_R_ONLY, DEVICE_MEM_W_ONLY }; enum DevMemType { DEVICE_MEM_DEFAULT = 0, DEVICE_MEM_AHP, //alloc host pointer DEVICE_MEM_UHP, //use host pointer DEVICE_MEM_CHP, //copy host pointer DEVICE_MEM_PM //persistent memory }; //Get the global device memory and read/write type //return 1 if unified memory system supported, otherwise return 0 CV_EXPORTS int getDevMemType(DevMemRW& rw_type, DevMemType& mem_type); //Set the global device memory and read/write type, //the newly generated oclMat will all use this type //return -1 if the target type is unsupported, otherwise return 0 CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT); //this class contains ocl runtime information class CV_EXPORTS Info { public: struct Impl; Impl *impl; Info(); Info(const Info &m); ~Info(); void release(); Info &operator = (const Info &m); std::vector DeviceName; String PlatformName; }; //////////////////////////////// Initialization & Info //////////////////////// //this function may be obsoleted //CV_EXPORTS cl_device_id getDevice(); //the function must be called before any other cv::ocl::functions, it initialize ocl runtime //each Info relates to an OpenCL platform //there is one or more devices in each platform, each one has a separate name CV_EXPORTS int getDevice(std::vector &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU); //set device you want to use, optional function after getDevice be called //the devnum is the index of the selected device in DeviceName vector of INfo CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0); //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue //returns cl_context * CV_EXPORTS void* getoclContext(); //returns cl_command_queue * CV_EXPORTS void* getoclCommandQueue(); //explicit call clFinish. The global command queue will be used. CV_EXPORTS void finish(); //this function enable ocl module to use customized cl_context and cl_command_queue //getDevice also need to be called before this function CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); //returns true when global OpenCL context is initialized CV_EXPORTS bool initialized(); //////////////////////////////// OpenCL context //////////////////////// //This is a global singleton class used to represent a OpenCL context. class CV_EXPORTS Context { protected: Context(); friend class std::auto_ptr; friend bool initialized(); private: static std::auto_ptr clCxt; static int val; public: ~Context(); void release(); Info::Impl* impl; static Context *getContext(); static void setContext(Info &oclinfo); enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2}; bool supportsFeature(int ftype); size_t computeUnits(); size_t maxWorkGroupSize(); void* oclContext(); void* oclCommandQueue(); }; //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, const char *build_options, bool finish = true, bool measureKernelTime = false, bool cleanUp = true); //! Calls a kernel, by file. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing. CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt , const char **fileName, const int numFiles, String kernelName, size_t globalThreads[3], size_t localThreads[3], std::vector< std::pair > &args, int channels, int depth, const char *build_options, bool finish = true, bool measureKernelTime = false, bool cleanUp = true); //! Enable or disable OpenCL program binary caching onto local disk // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the // compiled OpenCL program to be cached to the path automatically as "path/*.clb" // binary file, which will be reused when the OpenCV executable is started again. // // Caching mode is controlled by the following enums // Notes // 1. the feature is by default enabled when OpenCV is built in release mode. // 2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler; // for GNU compilers, the function always treats the build as release mode (enabled by default). enum { CACHE_NONE = 0, // do not cache OpenCL binary CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC) CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC) CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary CACHE_UPDATE = 0x1 << 2 // if the binary cache file with the same name is already on the disk, it will be updated. }; CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); //! set where binary cache to be saved to CV_EXPORTS void setBinpath(const char *path); class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// class CV_EXPORTS oclMat { public: //! default constructor oclMat(); //! constructs oclMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) oclMat(int rows, int cols, int type); oclMat(Size size, int type); //! constucts oclMatrix and fills it with the specified value _s. oclMat(int rows, int cols, int type, const Scalar &s); oclMat(Size size, int type, const Scalar &s); //! copy constructor oclMat(const oclMat &m); //! constructor for oclMatrix headers pointing to user-allocated data oclMat(int rows, int cols, int type, void *data, size_t step = Mat::AUTO_STEP); oclMat(Size size, int type, void *data, size_t step = Mat::AUTO_STEP); //! creates a matrix header for a part of the bigger matrix oclMat(const oclMat &m, const Range &rowRange, const Range &colRange); oclMat(const oclMat &m, const Rect &roi); //! builds oclMat from Mat. Perfom blocking upload to device. explicit oclMat (const Mat &m); //! destructor - calls release() ~oclMat(); //! assignment operators oclMat &operator = (const oclMat &m); //! assignment operator. Perfom blocking upload to device. oclMat &operator = (const Mat &m); oclMat &operator = (const oclMatExpr& expr); //! pefroms blocking upload data to oclMat. void upload(const cv::Mat &m); //! downloads data from device to host memory. Blocking calls. operator Mat() const; void download(cv::Mat &m) const; //! returns a new oclMatrix header for the specified row oclMat row(int y) const; //! returns a new oclMatrix header for the specified column oclMat col(int x) const; //! ... for the specified row span oclMat rowRange(int startrow, int endrow) const; oclMat rowRange(const Range &r) const; //! ... for the specified column span oclMat colRange(int startcol, int endcol) const; oclMat colRange(const Range &r) const; //! returns deep copy of the oclMatrix, i.e. the data is copied oclMat clone() const; //! copies the oclMatrix content to "m". // It calls m.create(this->size(), this->type()). // It supports any data type void copyTo( oclMat &m ) const; //! copies those oclMatrix elements to "m" that are marked with non-zero mask elements. //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 void copyTo( oclMat &m, const oclMat &mask ) const; //! converts oclMatrix to another datatype with optional scalng. See cvConvertScale. //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 void convertTo( oclMat &m, int rtype, double alpha = 1, double beta = 0 ) const; void assignTo( oclMat &m, int type = -1 ) const; //! sets every oclMatrix element to s //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 oclMat& operator = (const Scalar &s); //! sets some of the oclMatrix elements to s, according to the mask //It supports 8UC1 8UC4 32SC1 32SC4 32FC1 32FC4 oclMat& setTo(const Scalar &s, const oclMat &mask = oclMat()); //! creates alternative oclMatrix header for the same data, with different // number of channels and/or different number of rows. see cvReshape. oclMat reshape(int cn, int rows = 0) const; //! allocates new oclMatrix data unless the oclMatrix already has specified size and type. // previous data is unreferenced if needed. void create(int rows, int cols, int type); void create(Size size, int type); //! allocates new oclMatrix with specified device memory type. void createEx(int rows, int cols, int type, DevMemRW rw_type, DevMemType mem_type, void* hptr = 0); void createEx(Size size, int type, DevMemRW rw_type, DevMemType mem_type, void* hptr = 0); //! decreases reference counter; // deallocate the data when reference counter reaches 0. void release(); //! swaps with other smart pointer void swap(oclMat &mat); //! locates oclMatrix header within a parent oclMatrix. See below void locateROI( Size &wholeSize, Point &ofs ) const; //! moves/resizes the current oclMatrix ROI inside the parent oclMatrix. oclMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); //! extracts a rectangular sub-oclMatrix // (this is a generalized form of row, rowRange etc.) oclMat operator()( Range rowRange, Range colRange ) const; oclMat operator()( const Rect &roi ) const; oclMat& operator+=( const oclMat& m ); oclMat& operator-=( const oclMat& m ); oclMat& operator*=( const oclMat& m ); oclMat& operator/=( const oclMat& m ); //! returns true if the oclMatrix data is continuous // (i.e. when there are no gaps between successive rows). // similar to CV_IS_oclMat_CONT(cvoclMat->type) bool isContinuous() const; //! returns element size in bytes, // similar to CV_ELEM_SIZE(cvMat->type) size_t elemSize() const; //! returns the size of element channel in bytes. size_t elemSize1() const; //! returns element type, similar to CV_MAT_TYPE(cvMat->type) int type() const; //! returns element type, i.e. 8UC3 returns 8UC4 because in ocl //! 3 channels element actually use 4 channel space int ocltype() const; //! returns element type, similar to CV_MAT_DEPTH(cvMat->type) int depth() const; //! returns element type, similar to CV_MAT_CN(cvMat->type) int channels() const; //! returns element type, return 4 for 3 channels element, //!becuase 3 channels element actually use 4 channel space int oclchannels() const; //! returns step/elemSize1() size_t step1() const; //! returns oclMatrix size: // width == number of columns, height == number of rows Size size() const; //! returns true if oclMatrix data is NULL bool empty() const; //! returns pointer to y-th row uchar* ptr(int y = 0); const uchar *ptr(int y = 0) const; //! template version of the above method template _Tp *ptr(int y = 0); template const _Tp *ptr(int y = 0) const; //! matrix transposition oclMat t() const; /*! includes several bit-fields: - the magic signature - continuity flag - depth - number of channels */ int flags; //! the number of rows and columns int rows, cols; //! a distance between successive rows in bytes; includes the gap if any size_t step; //! pointer to the data(OCL memory object) uchar *data; //! pointer to the reference counter; // when oclMatrix points to user-allocated data, the pointer is NULL int *refcount; //! helper fields used in locateROI and adjustROI //datastart and dataend are not used in current version uchar *datastart; uchar *dataend; //! OpenCL context associated with the oclMat object. Context *clCxt; //add offset for handle ROI, calculated in byte int offset; //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used int wholerows; int wholecols; }; ///////////////////// mat split and merge ///////////////////////////////// //! Compose a multi-channel array from several single-channel arrays // Support all types CV_EXPORTS void merge(const oclMat *src, size_t n, oclMat &dst); CV_EXPORTS void merge(const std::vector &src, oclMat &dst); //! Divides multi-channel array into several single-channel arrays // Support all types CV_EXPORTS void split(const oclMat &src, oclMat *dst); CV_EXPORTS void split(const oclMat &src, std::vector &dst); ////////////////////////////// Arithmetics /////////////////////////////////// //#if defined DOUBLE_SUPPORT //typedef double F; //#else //typedef float F; //#endif // CV_EXPORTS void addWeighted(const oclMat& a,F alpha, const oclMat& b,F beta,F gama, oclMat& c); CV_EXPORTS void addWeighted(const oclMat &a, double alpha, const oclMat &b, double beta, double gama, oclMat &c); //! adds one matrix to another (c = a + b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c); //! adds one matrix to another (c = a + b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void add(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask); //! adds scalar to a matrix (c = a + s) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void add(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat()); //! subtracts one matrix from another (c = a - b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c); //! subtracts one matrix from another (c = a - b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void subtract(const oclMat &a, const oclMat &b, oclMat &c, const oclMat &mask); //! subtracts scalar from a matrix (c = a - s) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void subtract(const oclMat &a, const Scalar &sc, oclMat &c, const oclMat &mask = oclMat()); //! subtracts scalar from a matrix (c = a - s) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void subtract(const Scalar &sc, const oclMat &a, oclMat &c, const oclMat &mask = oclMat()); //! computes element-wise product of the two arrays (c = a * b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void multiply(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1); //! multiplies matrix to a number (dst = scalar * src) // supports CV_32FC1 only CV_EXPORTS void multiply(double scalar, const oclMat &src, oclMat &dst); //! computes element-wise quotient of the two arrays (c = a / b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void divide(const oclMat &a, const oclMat &b, oclMat &c, double scale = 1); //! computes element-wise quotient of the two arrays (c = a / b) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void divide(double scale, const oclMat &b, oclMat &c); //! compares elements of two arrays (c = a b) // supports except CV_8SC1,CV_8SC2,CV8SC3,CV_8SC4 types CV_EXPORTS void compare(const oclMat &a, const oclMat &b, oclMat &c, int cmpop); //! transposes the matrix // supports CV_8UC1, 8UC4, 8SC4, 16UC2, 16SC2, 32SC1 and 32FC1.(the same as cuda) CV_EXPORTS void transpose(const oclMat &src, oclMat &dst); //! computes element-wise absolute difference of two arrays (c = abs(a - b)) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void absdiff(const oclMat &a, const oclMat &b, oclMat &c); //! computes element-wise absolute difference of array and scalar (c = abs(a - s)) // supports all types except CV_8SC1,CV_8SC2,CV8SC3 and CV_8SC4 CV_EXPORTS void absdiff(const oclMat &a, const Scalar &s, oclMat &c); //! computes mean value and standard deviation of all or selected array elements // supports except CV_32F,CV_64F CV_EXPORTS void meanStdDev(const oclMat &mtx, Scalar &mean, Scalar &stddev); //! computes norm of array // supports NORM_INF, NORM_L1, NORM_L2 // supports only CV_8UC1 type CV_EXPORTS double norm(const oclMat &src1, int normType = NORM_L2); //! computes norm of the difference between two arrays // supports NORM_INF, NORM_L1, NORM_L2 // supports only CV_8UC1 type CV_EXPORTS double norm(const oclMat &src1, const oclMat &src2, int normType = NORM_L2); //! reverses the order of the rows, columns or both in a matrix // supports all types CV_EXPORTS void flip(const oclMat &a, oclMat &b, int flipCode); //! computes sum of array elements // disabled until fix crash // support all types CV_EXPORTS Scalar sum(const oclMat &m); CV_EXPORTS Scalar absSum(const oclMat &m); CV_EXPORTS Scalar sqrSum(const oclMat &m); //! finds global minimum and maximum array elements and returns their values // support all C1 types CV_EXPORTS void minMax(const oclMat &src, double *minVal, double *maxVal = 0, const oclMat &mask = oclMat()); CV_EXPORTS void minMax_buf(const oclMat &src, double *minVal, double *maxVal, const oclMat &mask, oclMat& buf); //! finds global minimum and maximum array elements and returns their values with locations // support all C1 types CV_EXPORTS void minMaxLoc(const oclMat &src, double *minVal, double *maxVal = 0, Point *minLoc = 0, Point *maxLoc = 0, const oclMat &mask = oclMat()); //! counts non-zero array elements // support all types CV_EXPORTS int countNonZero(const oclMat &src); //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) // destination array will have the depth type as lut and the same channels number as source //It supports 8UC1 8UC4 only CV_EXPORTS void LUT(const oclMat &src, const oclMat &lut, oclMat &dst); //! only 8UC1 and 256 bins is supported now CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist); //! only 8UC1 and 256 bins is supported now CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst); //! only 8UC1 is supported now class CV_EXPORTS CLAHE { public: virtual void apply(const oclMat &src, oclMat &dst) = 0; virtual void setClipLimit(double clipLimit) = 0; virtual double getClipLimit() const = 0; virtual void setTilesGridSize(Size tileGridSize) = 0; virtual Size getTilesGridSize() const = 0; virtual void collectGarbage() = 0; virtual ~CLAHE() {} }; CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); //! bilateralFilter // supports 8UC1 8UC4 CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT); //! computes exponent of each matrix element (b = e**a) // supports only CV_32FC1 type CV_EXPORTS void exp(const oclMat &a, oclMat &b); //! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) // supports only CV_32FC1 type CV_EXPORTS void log(const oclMat &a, oclMat &b); //! computes magnitude of each (x(i), y(i)) vector // supports only CV_32F CV_64F type CV_EXPORTS void magnitude(const oclMat &x, const oclMat &y, oclMat &magnitude); CV_EXPORTS void magnitudeSqr(const oclMat &x, const oclMat &y, oclMat &magnitude); CV_EXPORTS void magnitudeSqr(const oclMat &x, oclMat &magnitude); //! computes angle (angle(i)) of each (x(i), y(i)) vector // supports only CV_32F CV_64F type CV_EXPORTS void phase(const oclMat &x, const oclMat &y, oclMat &angle, bool angleInDegrees = false); //! the function raises every element of tne input array to p //! support only CV_32F CV_64F type CV_EXPORTS void pow(const oclMat &x, double p, oclMat &y); //! converts Cartesian coordinates to polar // supports only CV_32F CV_64F type CV_EXPORTS void cartToPolar(const oclMat &x, const oclMat &y, oclMat &magnitude, oclMat &angle, bool angleInDegrees = false); //! converts polar coordinates to Cartesian // supports only CV_32F CV_64F type CV_EXPORTS void polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees = false); //! perfroms per-elements bit-wise inversion // supports all types CV_EXPORTS void bitwise_not(const oclMat &src, oclMat &dst); //! calculates per-element bit-wise disjunction of two arrays // supports all types CV_EXPORTS void bitwise_or(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); CV_EXPORTS void bitwise_or(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); //! calculates per-element bit-wise conjunction of two arrays // supports all types CV_EXPORTS void bitwise_and(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); CV_EXPORTS void bitwise_and(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); //! calculates per-element bit-wise "exclusive or" operation // supports all types CV_EXPORTS void bitwise_xor(const oclMat &src1, const oclMat &src2, oclMat &dst, const oclMat &mask = oclMat()); CV_EXPORTS void bitwise_xor(const oclMat &src1, const Scalar &s, oclMat &dst, const oclMat &mask = oclMat()); //! Logical operators CV_EXPORTS oclMat operator ~ (const oclMat &); CV_EXPORTS oclMat operator | (const oclMat &, const oclMat &); CV_EXPORTS oclMat operator & (const oclMat &, const oclMat &); CV_EXPORTS oclMat operator ^ (const oclMat &, const oclMat &); //! Mathematics operators CV_EXPORTS oclMatExpr operator + (const oclMat &src1, const oclMat &src2); CV_EXPORTS oclMatExpr operator - (const oclMat &src1, const oclMat &src2); CV_EXPORTS oclMatExpr operator * (const oclMat &src1, const oclMat &src2); CV_EXPORTS oclMatExpr operator / (const oclMat &src1, const oclMat &src2); struct CV_EXPORTS ConvolveBuf { Size result_size; Size block_size; Size user_block_size; Size dft_size; oclMat image_spect, templ_spect, result_spect; oclMat image_block, templ_block, result_data; void create(Size image_size, Size templ_size); static Size estimateBlockSize(Size result_size, Size templ_size); }; //! computes convolution of two images, may use discrete Fourier transform //! support only CV_32FC1 type CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr = false); CV_EXPORTS void convolve(const oclMat &image, const oclMat &temp1, oclMat &result, bool ccorr, ConvolveBuf& buf); //! Performs a per-element multiplication of two Fourier spectrums. //! Only full (not packed) CV_32FC2 complex spectrums in the interleaved format are supported for now. //! support only CV_32FC2 type CV_EXPORTS void mulSpectrums(const oclMat &a, const oclMat &b, oclMat &c, int flags, float scale, bool conjB = false); CV_EXPORTS void cvtColor(const oclMat &src, oclMat &dst, int code , int dcn = 0); //////////////////////////////// Filter Engine //////////////////////////////// /*! The Base Class for 1D or Row-wise Filters This is the base class for linear or non-linear filters that process 1D data. In particular, such filters are used for the "horizontal" filtering parts in separable filters. */ class CV_EXPORTS BaseRowFilter_GPU { public: BaseRowFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} virtual ~BaseRowFilter_GPU() {} virtual void operator()(const oclMat &src, oclMat &dst) = 0; int ksize, anchor, bordertype; }; /*! The Base Class for Column-wise Filters This is the base class for linear or non-linear filters that process columns of 2D arrays. Such filters are used for the "vertical" filtering parts in separable filters. */ class CV_EXPORTS BaseColumnFilter_GPU { public: BaseColumnFilter_GPU(int ksize_, int anchor_, int bordertype_) : ksize(ksize_), anchor(anchor_), bordertype(bordertype_) {} virtual ~BaseColumnFilter_GPU() {} virtual void operator()(const oclMat &src, oclMat &dst) = 0; int ksize, anchor, bordertype; }; /*! The Base Class for Non-Separable 2D Filters. This is the base class for linear or non-linear 2D filters. */ class CV_EXPORTS BaseFilter_GPU { public: BaseFilter_GPU(const Size &ksize_, const Point &anchor_, const int &borderType_) : ksize(ksize_), anchor(anchor_), borderType(borderType_) {} virtual ~BaseFilter_GPU() {} virtual void operator()(const oclMat &src, oclMat &dst) = 0; Size ksize; Point anchor; int borderType; }; /*! The Base Class for Filter Engine. The class can be used to apply an arbitrary filtering operation to an image. It contains all the necessary intermediate buffers. */ class CV_EXPORTS FilterEngine_GPU { public: virtual ~FilterEngine_GPU() {} virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1)) = 0; }; //! returns the non-separable filter engine with the specified filter CV_EXPORTS Ptr createFilter2D_GPU(const Ptr filter2D); //! returns the primitive row filter with the specified kernel CV_EXPORTS Ptr getLinearRowFilter_GPU(int srcType, int bufType, const Mat &rowKernel, int anchor = -1, int bordertype = BORDER_DEFAULT); //! returns the primitive column filter with the specified kernel CV_EXPORTS Ptr getLinearColumnFilter_GPU(int bufType, int dstType, const Mat &columnKernel, int anchor = -1, int bordertype = BORDER_DEFAULT, double delta = 0.0); //! returns the separable linear filter engine CV_EXPORTS Ptr createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); //! returns the separable filter engine with the specified filters CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr &rowFilter, const Ptr &columnFilter); //! returns the Gaussian filter engine CV_EXPORTS Ptr createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT); //! returns filter engine for the generalized Sobel operator CV_EXPORTS Ptr createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT ); //! applies Laplacian operator to the image // supports only ksize = 1 and ksize = 3 8UC1 8UC4 32FC1 32FC4 data type CV_EXPORTS void Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize = 1, double scale = 1); //! returns 2D box filter // supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type CV_EXPORTS Ptr getBoxFilter_GPU(int srcType, int dstType, const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! returns box filter engine CV_EXPORTS Ptr createBoxFilter_GPU(int srcType, int dstType, const Size &ksize, const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! returns 2D filter with the specified kernel // supports CV_8UC1 and CV_8UC4 types CV_EXPORTS Ptr getLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Size &ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! returns the non-separable linear filter engine CV_EXPORTS Ptr createLinearFilter_GPU(int srcType, int dstType, const Mat &kernel, const Point &anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! smooths the image using the normalized box filter // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101,BORDER_WRAP CV_EXPORTS void boxFilter(const oclMat &src, oclMat &dst, int ddepth, Size ksize, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! returns 2D morphological filter //! only MORPH_ERODE and MORPH_DILATE are supported // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types // kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height CV_EXPORTS Ptr getMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Size &ksize, Point anchor = Point(-1, -1)); //! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported. CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat &kernel, const Point &anchor = Point(-1, -1), int iterations = 1); //! a synonym for normalized box filter // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 static inline void blur(const oclMat &src, oclMat &dst, Size ksize, Point anchor = Point(-1, -1), int borderType = BORDER_CONSTANT) { boxFilter(src, dst, -1, ksize, anchor, borderType); } //! applies non-separable 2D linear filter to the image // Note, at the moment this function only works when anchor point is in the kernel center // and kernel size supported is either 3x3 or 5x5; otherwise the function will fail to output valid result CV_EXPORTS void filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); //! applies separable 2D linear filter to the image CV_EXPORTS void sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); //! applies generalized Sobel operator to the image // dst.type must equalize src.type // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 CV_EXPORTS void Sobel(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT); //! applies the vertical or horizontal Scharr operator to the image // dst.type must equalize src.type // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 CV_EXPORTS void Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy, double scale = 1, double delta = 0.0, int bordertype = BORDER_DEFAULT); //! smooths the image using Gaussian filter. // dst.type must equalize src.type // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 // supports border type: BORDER_CONSTANT, BORDER_REPLICATE, BORDER_REFLECT,BORDER_REFLECT_101 CV_EXPORTS void GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT); //! erodes the image (applies the local minimum operator) // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 CV_EXPORTS void erode( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); //! dilates the image (applies the local maximum operator) // supports data type: CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 CV_EXPORTS void dilate( const oclMat &src, oclMat &dst, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); //! applies an advanced morphological operation to the image CV_EXPORTS void morphologyEx( const oclMat &src, oclMat &dst, int op, const Mat &kernel, Point anchor = Point(-1, -1), int iterations = 1, int borderType = BORDER_CONSTANT, const Scalar &borderValue = morphologyDefaultBorderValue()); ////////////////////////////// Image processing ////////////////////////////// //! Does mean shift filtering on GPU. CV_EXPORTS void meanShiftFiltering(const oclMat &src, oclMat &dst, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); //! Does mean shift procedure on GPU. CV_EXPORTS void meanShiftProc(const oclMat &src, oclMat &dstr, oclMat &dstsp, int sp, int sr, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); //! Does mean shift segmentation with elimiation of small regions. CV_EXPORTS void meanShiftSegmentation(const oclMat &src, Mat &dst, int sp, int sr, int minsize, TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1)); //! applies fixed threshold to the image. // supports CV_8UC1 and CV_32FC1 data type // supports threshold type: THRESH_BINARY, THRESH_BINARY_INV, THRESH_TRUNC, THRESH_TOZERO, THRESH_TOZERO_INV CV_EXPORTS double threshold(const oclMat &src, oclMat &dst, double thresh, double maxVal, int type = THRESH_TRUNC); //! resizes the image // Supports INTER_NEAREST, INTER_LINEAR // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types CV_EXPORTS void resize(const oclMat &src, oclMat &dst, Size dsize, double fx = 0, double fy = 0, int interpolation = INTER_LINEAR); //! Applies a generic geometrical transformation to an image. // Supports INTER_NEAREST, INTER_LINEAR. // Map1 supports CV_16SC2, CV_32FC2 types. // Src supports CV_8UC1, CV_8UC2, CV_8UC4. CV_EXPORTS void remap(const oclMat &src, oclMat &dst, oclMat &map1, oclMat &map2, int interpolation, int bordertype, const Scalar &value = Scalar()); //! copies 2D array to a larger destination array and pads borders with user-specifiable constant // supports CV_8UC1, CV_8UC4, CV_32SC1 types CV_EXPORTS void copyMakeBorder(const oclMat &src, oclMat &dst, int top, int bottom, int left, int right, int boardtype, const Scalar &value = Scalar()); //! Smoothes image using median filter // The source 1- or 4-channel image. When m is 3 or 5, the image depth should be CV 8U or CV 32F. CV_EXPORTS void medianFilter(const oclMat &src, oclMat &dst, int m); //! warps the image using affine transformation // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types CV_EXPORTS void warpAffine(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR); //! warps the image using perspective transformation // Supports INTER_NEAREST, INTER_LINEAR, INTER_CUBIC // supports CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 types CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR); //! computes the integral image and integral for the squared image // sum will have CV_32S type, sqsum - CV32F type // supports only CV_8UC1 source type CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum); CV_EXPORTS void integral(const oclMat &src, oclMat &sum); CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT); CV_EXPORTS void cornerMinEigenVal(const oclMat &src, oclMat &dst, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); CV_EXPORTS void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy, int blockSize, int ksize, int bordertype = cv::BORDER_DEFAULT); //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////CascadeClassifier////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// #if 0 class CV_EXPORTS OclCascadeClassifier : public cv::CascadeClassifier { public: OclCascadeClassifier() {}; ~OclCascadeClassifier() {}; CvSeq* oclHaarDetectObjects(oclMat &gimg, CvMemStorage *storage, double scaleFactor, int minNeighbors, int flags, CvSize minSize = cvSize(0, 0), CvSize maxSize = cvSize(0, 0)); }; #endif #if 0 class CV_EXPORTS OclCascadeClassifierBuf : public cv::CascadeClassifier { public: OclCascadeClassifierBuf() : m_flags(0), initialized(false), m_scaleFactor(0), buffers(NULL) {} ~OclCascadeClassifierBuf() { release(); } void detectMultiScale(oclMat &image, CV_OUT std::vector& faces, double scaleFactor = 1.1, int minNeighbors = 3, int flags = 0, Size minSize = Size(), Size maxSize = Size()); void release(); private: void Init(const int rows, const int cols, double scaleFactor, int flags, const int outputsz, const size_t localThreads[], Size minSize, Size maxSize); void CreateBaseBufs(const int datasize, const int totalclassifier, const int flags, const int outputsz); void CreateFactorRelatedBufs(const int rows, const int cols, const int flags, const double scaleFactor, const size_t localThreads[], Size minSize, Size maxSize); void GenResult(CV_OUT std::vector& faces, const std::vector &rectList, const std::vector &rweights); int m_rows; int m_cols; int m_flags; int m_loopcount; int m_nodenum; bool findBiggestObject; bool initialized; double m_scaleFactor; Size m_minSize; Size m_maxSize; std::vector sizev; std::vector scalev; oclMat gimg1, gsum, gsqsum; void * buffers; }; #endif /////////////////////////////// Pyramid ///////////////////////////////////// CV_EXPORTS void pyrDown(const oclMat &src, oclMat &dst); //! upsamples the source image and then smoothes it CV_EXPORTS void pyrUp(const oclMat &src, oclMat &dst); //! performs linear blending of two images //! to avoid accuracy errors sum of weigths shouldn't be very close to zero // supports only CV_8UC1 source type CV_EXPORTS void blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, oclMat &result); //! computes vertical sum, supports only CV_32FC1 images CV_EXPORTS void columnSum(const oclMat &src, oclMat &sum); ///////////////////////////////////////// match_template ///////////////////////////////////////////////////////////// struct CV_EXPORTS MatchTemplateBuf { Size user_block_size; oclMat imagef, templf; std::vector images; std::vector image_sums; std::vector image_sqsums; }; //! computes the proximity map for the raster template and the image where the template is searched for // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method); //! computes the proximity map for the raster template and the image where the template is searched for // Supports TM_SQDIFF, TM_SQDIFF_NORMED, TM_CCORR, TM_CCORR_NORMED, TM_CCOEFF, TM_CCOEFF_NORMED for type 8UC1 and 8UC4 // Supports TM_SQDIFF, TM_CCORR for type 32FC1 and 32FC4 CV_EXPORTS void matchTemplate(const oclMat &image, const oclMat &templ, oclMat &result, int method, MatchTemplateBuf &buf); ///////////////////////////////////////////// Canny ///////////////////////////////////////////// struct CV_EXPORTS CannyBuf; //! compute edges of the input image using Canny operator // Support CV_8UC1 only CV_EXPORTS void Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); CV_EXPORTS void Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); CV_EXPORTS void Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false); struct CV_EXPORTS CannyBuf { CannyBuf() : counter(NULL) {} ~CannyBuf() { release(); } explicit CannyBuf(const Size &image_size, int apperture_size = 3) : counter(NULL) { create(image_size, apperture_size); } CannyBuf(const oclMat &dx_, const oclMat &dy_); void create(const Size &image_size, int apperture_size = 3); void release(); oclMat dx, dy; oclMat dx_buf, dy_buf; oclMat magBuf, mapBuf; oclMat trackBuf1, trackBuf2; void *counter; Ptr filterDX, filterDY; }; ///////////////////////////////////////// Hough Transform ///////////////////////////////////////// //! HoughCircles struct HoughCirclesBuf { oclMat edges; oclMat accum; oclMat srcPoints; oclMat centers; CannyBuf cannyBuf; }; CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); CV_EXPORTS void HoughCircles(const oclMat& src, oclMat& circles, HoughCirclesBuf& buf, int method, float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles = 4096); CV_EXPORTS void HoughCirclesDownload(const oclMat& d_circles, OutputArray h_circles); ///////////////////////////////////////// clAmdFft related ///////////////////////////////////////// //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. //! Param dft_size is the size of DFT transform. //! //! For complex-to-real transform it is assumed that the source matrix is packed in CLFFT's format. // support src type of CV32FC1, CV32FC2 // support flags: DFT_INVERSE, DFT_REAL_OUTPUT, DFT_COMPLEX_OUTPUT, DFT_ROWS // dft_size is the size of original input, which is used for transformation from complex to real. // dft_size must be powers of 2, 3 and 5 // real to complex dft requires at least v1.8 clAmdFft // real to complex dft output is not the same with cpu version // real to complex and complex to real does not support DFT_ROWS CV_EXPORTS void dft(const oclMat &src, oclMat &dst, Size dft_size = Size(0, 0), int flags = 0); //! implements generalized matrix product algorithm GEMM from BLAS // The functionality requires clAmdBlas library // only support type CV_32FC1 // flag GEMM_3_T is not supported CV_EXPORTS void gemm(const oclMat &src1, const oclMat &src2, double alpha, const oclMat &src3, double beta, oclMat &dst, int flags = 0); //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// struct CV_EXPORTS HOGDescriptor { enum { DEFAULT_WIN_SIGMA = -1 }; enum { DEFAULT_NLEVELS = 64 }; enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; HOGDescriptor(Size win_size = Size(64, 128), Size block_size = Size(16, 16), Size block_stride = Size(8, 8), Size cell_size = Size(8, 8), int nbins = 9, double win_sigma = DEFAULT_WIN_SIGMA, double threshold_L2hys = 0.2, bool gamma_correction = true, int nlevels = DEFAULT_NLEVELS); size_t getDescriptorSize() const; size_t getBlockHistogramSize() const; void setSVMDetector(const std::vector &detector); static std::vector getDefaultPeopleDetector(); static std::vector getPeopleDetector48x96(); static std::vector getPeopleDetector64x128(); void detect(const oclMat &img, std::vector &found_locations, double hit_threshold = 0, Size win_stride = Size(), Size padding = Size()); void detectMultiScale(const oclMat &img, std::vector &found_locations, double hit_threshold = 0, Size win_stride = Size(), Size padding = Size(), double scale0 = 1.05, int group_threshold = 2); void getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format = DESCR_FORMAT_COL_BY_COL); Size win_size; Size block_size; Size block_stride; Size cell_size; int nbins; double win_sigma; double threshold_L2hys; bool gamma_correction; int nlevels; protected: // initialize buffers; only need to do once in case of multiscale detection void init_buffer(const oclMat &img, Size win_stride); void computeBlockHistograms(const oclMat &img); void computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle); double getWinSigma() const; bool checkDetectorSize() const; static int numPartsWithin(int size, int part_size, int stride); static Size numPartsWithin(Size size, Size part_size, Size stride); // Coefficients of the separating plane float free_coef; oclMat detector; // Results of the last classification step oclMat labels; Mat labels_host; // Results of the last histogram evaluation step oclMat block_hists; // Gradients conputation results oclMat grad, qangle; // scaled image oclMat image_scale; // effect size of input image (might be different from original size after scaling) Size effect_size; }; ////////////////////////feature2d_ocl///////////////// /****************************************************************************************\ * Distance * \****************************************************************************************/ template struct CV_EXPORTS Accumulator { typedef T Type; }; template<> struct Accumulator { typedef float Type; }; template<> struct Accumulator { typedef float Type; }; template<> struct Accumulator { typedef float Type; }; template<> struct Accumulator { typedef float Type; }; /* * Manhattan distance (city block distance) functor */ template struct CV_EXPORTS L1 { enum { normType = NORM_L1 }; typedef T ValueType; typedef typename Accumulator::Type ResultType; ResultType operator()( const T *a, const T *b, int size ) const { return normL1(a, b, size); } }; /* * Euclidean distance functor */ template struct CV_EXPORTS L2 { enum { normType = NORM_L2 }; typedef T ValueType; typedef typename Accumulator::Type ResultType; ResultType operator()( const T *a, const T *b, int size ) const { return (ResultType)std::sqrt((double)normL2Sqr(a, b, size)); } }; /* * Hamming distance functor - counts the bit differences between two strings - useful for the Brief descriptor * bit count of A exclusive XOR'ed with B */ struct CV_EXPORTS Hamming { enum { normType = NORM_HAMMING }; typedef unsigned char ValueType; typedef int ResultType; /** this will count the bits in a ^ b */ ResultType operator()( const unsigned char *a, const unsigned char *b, int size ) const { return normHamming(a, b, size); } }; ////////////////////////////////// BruteForceMatcher ////////////////////////////////// class CV_EXPORTS BruteForceMatcher_OCL_base { public: enum DistType {L1Dist = 0, L2Dist, HammingDist}; explicit BruteForceMatcher_OCL_base(DistType distType = L2Dist); // Add descriptors to train descriptor collection void add(const std::vector &descCollection); // Get train descriptors collection const std::vector &getTrainDescriptors() const; // Clear train descriptors collection void clear(); // Return true if there are not train descriptors in collection bool empty() const; // Return true if the matcher supports mask in match methods bool isMaskSupported() const; // Find one best match for each query descriptor void matchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, const oclMat &mask = oclMat()); // Download trainIdx and distance and convert it to CPU vector with DMatch static void matchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector &matches); // Convert trainIdx and distance to vector with DMatch static void matchConvert(const Mat &trainIdx, const Mat &distance, std::vector &matches); // Find one best match for each query descriptor void match(const oclMat &query, const oclMat &train, std::vector &matches, const oclMat &mask = oclMat()); // Make gpu collection of trains and masks in suitable format for matchCollection function void makeGpuCollection(oclMat &trainCollection, oclMat &maskCollection, const std::vector &masks = std::vector()); // Find one best match from train collection for each query descriptor void matchCollection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &masks = oclMat()); // Download trainIdx, imgIdx and distance and convert it to vector with DMatch static void matchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector &matches); // Convert trainIdx, imgIdx and distance to vector with DMatch static void matchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector &matches); // Find one best match from train collection for each query descriptor. void match(const oclMat &query, std::vector &matches, const std::vector &masks = std::vector()); // Find k best matches for each query descriptor (in increasing order of distances) void knnMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &allDist, int k, const oclMat &mask = oclMat()); // Download trainIdx and distance and convert it to vector with DMatch // compactResult is used when mask is not empty. If compactResult is false matches // vector will have the same size as queryDescriptors rows. If compactResult is true // matches vector will not contain matches for fully masked out query descriptors. static void knnMatchDownload(const oclMat &trainIdx, const oclMat &distance, std::vector< std::vector > &matches, bool compactResult = false); // Convert trainIdx and distance to vector with DMatch static void knnMatchConvert(const Mat &trainIdx, const Mat &distance, std::vector< std::vector > &matches, bool compactResult = false); // Find k best matches for each query descriptor (in increasing order of distances). // compactResult is used when mask is not empty. If compactResult is false matches // vector will have the same size as queryDescriptors rows. If compactResult is true // matches vector will not contain matches for fully masked out query descriptors. void knnMatch(const oclMat &query, const oclMat &train, std::vector< std::vector > &matches, int k, const oclMat &mask = oclMat(), bool compactResult = false); // Find k best matches from train collection for each query descriptor (in increasing order of distances) void knnMatch2Collection(const oclMat &query, const oclMat &trainCollection, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, const oclMat &maskCollection = oclMat()); // Download trainIdx and distance and convert it to vector with DMatch // compactResult is used when mask is not empty. If compactResult is false matches // vector will have the same size as queryDescriptors rows. If compactResult is true // matches vector will not contain matches for fully masked out query descriptors. static void knnMatch2Download(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, std::vector< std::vector > &matches, bool compactResult = false); // Convert trainIdx and distance to vector with DMatch static void knnMatch2Convert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, std::vector< std::vector > &matches, bool compactResult = false); // Find k best matches for each query descriptor (in increasing order of distances). // compactResult is used when mask is not empty. If compactResult is false matches // vector will have the same size as queryDescriptors rows. If compactResult is true // matches vector will not contain matches for fully masked out query descriptors. void knnMatch(const oclMat &query, std::vector< std::vector > &matches, int k, const std::vector &masks = std::vector(), bool compactResult = false); // Find best matches for each query descriptor which have distance less than maxDistance. // nMatches.at(0, queryIdx) will contain matches count for queryIdx. // carefully nMatches can be greater than trainIdx.cols - it means that matcher didn't find all matches, // because it didn't have enough memory. // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nTrain / 100), 10), // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches // Matches doesn't sorted. void radiusMatchSingle(const oclMat &query, const oclMat &train, oclMat &trainIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const oclMat &mask = oclMat()); // Download trainIdx, nMatches and distance and convert it to vector with DMatch. // matches will be sorted in increasing order of distances. // compactResult is used when mask is not empty. If compactResult is false matches // vector will have the same size as queryDescriptors rows. If compactResult is true // matches vector will not contain matches for fully masked out query descriptors. static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &distance, const oclMat &nMatches, std::vector< std::vector > &matches, bool compactResult = false); // Convert trainIdx, nMatches and distance to vector with DMatch. static void radiusMatchConvert(const Mat &trainIdx, const Mat &distance, const Mat &nMatches, std::vector< std::vector > &matches, bool compactResult = false); // Find best matches for each query descriptor which have distance less than maxDistance // in increasing order of distances). void radiusMatch(const oclMat &query, const oclMat &train, std::vector< std::vector > &matches, float maxDistance, const oclMat &mask = oclMat(), bool compactResult = false); // Find best matches for each query descriptor which have distance less than maxDistance. // If trainIdx is empty, then trainIdx and distance will be created with size nQuery x max((nQuery / 100), 10), // otherwize user can pass own allocated trainIdx and distance with size nQuery x nMaxMatches // Matches doesn't sorted. void radiusMatchCollection(const oclMat &query, oclMat &trainIdx, oclMat &imgIdx, oclMat &distance, oclMat &nMatches, float maxDistance, const std::vector &masks = std::vector()); // Download trainIdx, imgIdx, nMatches and distance and convert it to vector with DMatch. // matches will be sorted in increasing order of distances. // compactResult is used when mask is not empty. If compactResult is false matches // vector will have the same size as queryDescriptors rows. If compactResult is true // matches vector will not contain matches for fully masked out query descriptors. static void radiusMatchDownload(const oclMat &trainIdx, const oclMat &imgIdx, const oclMat &distance, const oclMat &nMatches, std::vector< std::vector > &matches, bool compactResult = false); // Convert trainIdx, nMatches and distance to vector with DMatch. static void radiusMatchConvert(const Mat &trainIdx, const Mat &imgIdx, const Mat &distance, const Mat &nMatches, std::vector< std::vector > &matches, bool compactResult = false); // Find best matches from train collection for each query descriptor which have distance less than // maxDistance (in increasing order of distances). void radiusMatch(const oclMat &query, std::vector< std::vector > &matches, float maxDistance, const std::vector &masks = std::vector(), bool compactResult = false); DistType distType; private: std::vector trainDescCollection; }; template class CV_EXPORTS BruteForceMatcher_OCL; template class CV_EXPORTS BruteForceMatcher_OCL< L1 > : public BruteForceMatcher_OCL_base { public: explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L1Dist) {} explicit BruteForceMatcher_OCL(L1 /*d*/) : BruteForceMatcher_OCL_base(L1Dist) {} }; template class CV_EXPORTS BruteForceMatcher_OCL< L2 > : public BruteForceMatcher_OCL_base { public: explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(L2Dist) {} explicit BruteForceMatcher_OCL(L2 /*d*/) : BruteForceMatcher_OCL_base(L2Dist) {} }; template <> class CV_EXPORTS BruteForceMatcher_OCL< Hamming > : public BruteForceMatcher_OCL_base { public: explicit BruteForceMatcher_OCL() : BruteForceMatcher_OCL_base(HammingDist) {} explicit BruteForceMatcher_OCL(Hamming /*d*/) : BruteForceMatcher_OCL_base(HammingDist) {} }; class CV_EXPORTS BFMatcher_OCL : public BruteForceMatcher_OCL_base { public: explicit BFMatcher_OCL(int norm = NORM_L2) : BruteForceMatcher_OCL_base(norm == NORM_L1 ? L1Dist : norm == NORM_L2 ? L2Dist : HammingDist) {} }; class CV_EXPORTS GoodFeaturesToTrackDetector_OCL { public: explicit GoodFeaturesToTrackDetector_OCL(int maxCorners = 1000, double qualityLevel = 0.01, double minDistance = 0.0, int blockSize = 3, bool useHarrisDetector = false, double harrisK = 0.04); //! return 1 rows matrix with CV_32FC2 type void operator ()(const oclMat& image, oclMat& corners, const oclMat& mask = oclMat()); //! download points of type Point2f to a vector. the vector's content will be erased void downloadPoints(const oclMat &points, std::vector &points_v); int maxCorners; double qualityLevel; double minDistance; int blockSize; bool useHarrisDetector; double harrisK; void releaseMemory() { Dx_.release(); Dy_.release(); eig_.release(); minMaxbuf_.release(); tmpCorners_.release(); } private: oclMat Dx_; oclMat Dy_; oclMat eig_; oclMat minMaxbuf_; oclMat tmpCorners_; }; inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_, int blockSize_, bool useHarrisDetector_, double harrisK_) { maxCorners = maxCorners_; qualityLevel = qualityLevel_; minDistance = minDistance_; blockSize = blockSize_; useHarrisDetector = useHarrisDetector_; harrisK = harrisK_; } /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// class CV_EXPORTS PyrLKOpticalFlow { public: PyrLKOpticalFlow() { winSize = Size(21, 21); maxLevel = 3; iters = 30; derivLambda = 0.5; useInitialFlow = false; minEigThreshold = 1e-4f; getMinEigenVals = false; isDeviceArch11_ = false; } void sparse(const oclMat &prevImg, const oclMat &nextImg, const oclMat &prevPts, oclMat &nextPts, oclMat &status, oclMat *err = 0); void dense(const oclMat &prevImg, const oclMat &nextImg, oclMat &u, oclMat &v, oclMat *err = 0); Size winSize; int maxLevel; int iters; double derivLambda; bool useInitialFlow; float minEigThreshold; bool getMinEigenVals; void releaseMemory() { dx_calcBuf_.release(); dy_calcBuf_.release(); prevPyr_.clear(); nextPyr_.clear(); dx_buf_.release(); dy_buf_.release(); } private: void calcSharrDeriv(const oclMat &src, oclMat &dx, oclMat &dy); void buildImagePyramid(const oclMat &img0, std::vector &pyr, bool withBorder); oclMat dx_calcBuf_; oclMat dy_calcBuf_; std::vector prevPyr_; std::vector nextPyr_; oclMat dx_buf_; oclMat dy_buf_; oclMat uPyr_[2]; oclMat vPyr_[2]; bool isDeviceArch11_; }; //////////////// build warping maps //////////////////// //! builds plane warping maps CV_EXPORTS void buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T, float scale, oclMat &map_x, oclMat &map_y); //! builds cylindrical warping maps CV_EXPORTS void buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y); //! builds spherical warping maps CV_EXPORTS void buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat &R, float scale, oclMat &map_x, oclMat &map_y); //! builds Affine warping maps CV_EXPORTS void buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap); //! builds Perspective warping maps CV_EXPORTS void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap); ///////////////////////////////////// interpolate frames ////////////////////////////////////////////// //! Interpolate frames (images) using provided optical flow (displacement field). //! frame0 - frame 0 (32-bit floating point images, single channel) //! frame1 - frame 1 (the same type and size) //! fu - forward horizontal displacement //! fv - forward vertical displacement //! bu - backward horizontal displacement //! bv - backward vertical displacement //! pos - new frame position //! newFrame - new frame //! buf - temporary buffer, will have width x 6*height size, CV_32FC1 type and contain 6 oclMat; //! occlusion masks 0, occlusion masks 1, //! interpolated forward flow 0, interpolated forward flow 1, //! interpolated backward flow 0, interpolated backward flow 1 //! CV_EXPORTS void interpolateFrames(const oclMat &frame0, const oclMat &frame1, const oclMat &fu, const oclMat &fv, const oclMat &bu, const oclMat &bv, float pos, oclMat &newFrame, oclMat &buf); //! computes moments of the rasterized shape or a vector of points CV_EXPORTS Moments ocl_moments(InputArray _array, bool binaryImage); class CV_EXPORTS StereoBM_OCL { public: enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 }; enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 }; //! the default constructor StereoBM_OCL(); //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8. StereoBM_OCL(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ); //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair //! Output disparity has CV_8U type. void operator() ( const oclMat &left, const oclMat &right, oclMat &disparity); //! Some heuristics that tries to estmate // if current GPU will be faster then CPU in this algorithm. // It queries current active device. static bool checkIfGpuCallReasonable(); int preset; int ndisp; int winSize; // If avergeTexThreshold == 0 => post procesing is disabled // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold // i.e. input left image is low textured. float avergeTexThreshold; private: oclMat minSSD, leBuf, riBuf; }; class CV_EXPORTS StereoBeliefPropagation { public: enum { DEFAULT_NDISP = 64 }; enum { DEFAULT_ITERS = 5 }; enum { DEFAULT_LEVELS = 5 }; static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels); explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_16S); StereoBeliefPropagation(int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F); void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); void operator()(const oclMat &data, oclMat &disparity); int ndisp; int iters; int levels; float max_data_term; float data_weight; float max_disc_term; float disc_single_jump; int msg_type; private: oclMat u, d, l, r, u2, d2, l2, r2; std::vector datas; oclMat out; }; class CV_EXPORTS StereoConstantSpaceBP { public: enum { DEFAULT_NDISP = 128 }; enum { DEFAULT_ITERS = 8 }; enum { DEFAULT_LEVELS = 4 }; enum { DEFAULT_NR_PLANE = 4 }; static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane); explicit StereoConstantSpaceBP( int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F); StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th = 0, int msg_type = CV_32F); void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); int ndisp; int iters; int levels; int nr_plane; float max_data_term; float data_weight; float max_disc_term; float disc_single_jump; int min_disp_th; int msg_type; bool use_local_init_data_cost; private: oclMat u[2], d[2], l[2], r[2]; oclMat disp_selected_pyr[2]; oclMat data_cost; oclMat data_cost_selected; oclMat temp; oclMat out; }; // Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method // // see reference: // [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow". // [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation". class CV_EXPORTS OpticalFlowDual_TVL1_OCL { public: OpticalFlowDual_TVL1_OCL(); void operator ()(const oclMat& I0, const oclMat& I1, oclMat& flowx, oclMat& flowy); void collectGarbage(); /** * Time step of the numerical scheme. */ double tau; /** * Weight parameter for the data term, attachment parameter. * This is the most relevant parameter, which determines the smoothness of the output. * The smaller this parameter is, the smoother the solutions we obtain. * It depends on the range of motions of the images, so its value should be adapted to each image sequence. */ double lambda; /** * Weight parameter for (u - v)^2, tightness parameter. * It serves as a link between the attachment and the regularization terms. * In theory, it should have a small value in order to maintain both parts in correspondence. * The method is stable for a large range of values of this parameter. */ double theta; /** * Number of scales used to create the pyramid of images. */ int nscales; /** * Number of warpings per scale. * Represents the number of times that I1(x+u0) and grad( I1(x+u0) ) are computed per scale. * This is a parameter that assures the stability of the method. * It also affects the running time, so it is a compromise between speed and accuracy. */ int warps; /** * Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time. * A small value will yield more accurate solutions at the expense of a slower convergence. */ double epsilon; /** * Stopping criterion iterations number used in the numerical scheme. */ int iterations; bool useInitialFlow; private: void procOneScale(const oclMat& I0, const oclMat& I1, oclMat& u1, oclMat& u2); std::vector I0s; std::vector I1s; std::vector u1s; std::vector u2s; oclMat I1x_buf; oclMat I1y_buf; oclMat I1w_buf; oclMat I1wx_buf; oclMat I1wy_buf; oclMat grad_buf; oclMat rho_c_buf; oclMat p11_buf; oclMat p12_buf; oclMat p21_buf; oclMat p22_buf; oclMat diff_buf; oclMat norm_buf; }; } } #if defined _MSC_VER && _MSC_VER >= 1200 # pragma warning( push) # pragma warning( disable: 4267) #endif #include "opencv2/ocl/matrix_operations.hpp" #if defined _MSC_VER && _MSC_VER >= 1200 # pragma warning( pop) #endif #endif /* __OPENCV_OCL_HPP__ */