diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 168433d20..1d2bb701d 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -47,8 +47,8 @@ #include "opencv2/core/core.hpp" #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/objdetect/objdetect.hpp" -#include "opencv2/gpu/devmem2d.hpp" #include "opencv2/features2d/features2d.hpp" +#include "opencv2/gpu/gpumat.hpp" namespace cv { @@ -143,182 +143,6 @@ namespace cv CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); CV_EXPORTS void nppError( int err, const char *file, const int line, const char *func); - //////////////////////////////// GpuMat //////////////////////////////// - class Stream; - class CudaMem; - - //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. - class CV_EXPORTS GpuMat - { - public: - //! default constructor - GpuMat(); - //! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) - GpuMat(int rows, int cols, int type); - GpuMat(Size size, int type); - //! constucts GpuMatrix and fills it with the specified value _s. - GpuMat(int rows, int cols, int type, const Scalar& s); - GpuMat(Size size, int type, const Scalar& s); - //! copy constructor - GpuMat(const GpuMat& m); - - //! constructor for GpuMatrix headers pointing to user-allocated data - GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); - GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); - - //! creates a matrix header for a part of the bigger matrix - GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange); - GpuMat(const GpuMat& m, const Rect& roi); - - //! builds GpuMat from Mat. Perfom blocking upload to device. - explicit GpuMat (const Mat& m); - - //! destructor - calls release() - ~GpuMat(); - - //! assignment operators - GpuMat& operator = (const GpuMat& m); - //! assignment operator. Perfom blocking upload to device. - GpuMat& operator = (const Mat& m); - - //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code. - // Contains just image size, data ptr and step. - template operator DevMem2D_() const; - template operator PtrStep_() const; - - //! pefroms blocking upload data to GpuMat. - void upload(const cv::Mat& m); - - //! upload async - void upload(const CudaMem& m, Stream& stream); - - //! downloads data from device to host memory. Blocking calls. - operator Mat() const; - void download(cv::Mat& m) const; - - //! download async - void download(CudaMem& m, Stream& stream) const; - - //! returns a new GpuMatrix header for the specified row - GpuMat row(int y) const; - //! returns a new GpuMatrix header for the specified column - GpuMat col(int x) const; - //! ... for the specified row span - GpuMat rowRange(int startrow, int endrow) const; - GpuMat rowRange(const Range& r) const; - //! ... for the specified column span - GpuMat colRange(int startcol, int endcol) const; - GpuMat colRange(const Range& r) const; - - //! returns deep copy of the GpuMatrix, i.e. the data is copied - GpuMat clone() const; - //! copies the GpuMatrix content to "m". - // It calls m.create(this->size(), this->type()). - void copyTo( GpuMat& m ) const; - //! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements. - void copyTo( GpuMat& m, const GpuMat& mask ) const; - //! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale. - void convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const; - - void assignTo( GpuMat& m, int type=-1 ) const; - - //! sets every GpuMatrix element to s - GpuMat& operator = (const Scalar& s); - //! sets some of the GpuMatrix elements to s, according to the mask - GpuMat& setTo(const Scalar& s, const GpuMat& mask = GpuMat()); - //! creates alternative GpuMatrix header for the same data, with different - // number of channels and/or different number of rows. see cvReshape. - GpuMat reshape(int cn, int rows = 0) const; - - //! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type. - // previous data is unreferenced if needed. - void create(int rows, int cols, int type); - void create(Size size, int type); - //! decreases reference counter; - // deallocate the data when reference counter reaches 0. - void release(); - - //! swaps with other smart pointer - void swap(GpuMat& mat); - - //! locates GpuMatrix header within a parent GpuMatrix. See below - void locateROI( Size& wholeSize, Point& ofs ) const; - //! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix. - GpuMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); - //! extracts a rectangular sub-GpuMatrix - // (this is a generalized form of row, rowRange etc.) - GpuMat operator()( Range rowRange, Range colRange ) const; - GpuMat operator()( const Rect& roi ) const; - - //! returns true iff the GpuMatrix data is continuous - // (i.e. when there are no gaps between successive rows). - // similar to CV_IS_GpuMat_CONT(cvGpuMat->type) - bool isContinuous() const; - //! returns element size in bytes, - // similar to CV_ELEM_SIZE(cvMat->type) - size_t elemSize() const; - //! returns the size of element channel in bytes. - size_t elemSize1() const; - //! returns element type, similar to CV_MAT_TYPE(cvMat->type) - int type() const; - //! returns element type, similar to CV_MAT_DEPTH(cvMat->type) - int depth() const; - //! returns element type, similar to CV_MAT_CN(cvMat->type) - int channels() const; - //! returns step/elemSize1() - size_t step1() const; - //! returns GpuMatrix size: - // width == number of columns, height == number of rows - Size size() const; - //! returns true if GpuMatrix data is NULL - bool empty() const; - - //! returns pointer to y-th row - uchar* ptr(int y = 0); - const uchar* ptr(int y = 0) const; - - //! template version of the above method - template _Tp* ptr(int y = 0); - template const _Tp* ptr(int y = 0) const; - - //! matrix transposition - GpuMat t() const; - - /*! includes several bit-fields: - - the magic signature - - continuity flag - - depth - - number of channels - */ - int flags; - //! the number of rows and columns - int rows, cols; - //! a distance between successive rows in bytes; includes the gap if any - size_t step; - //! pointer to the data - uchar* data; - - //! pointer to the reference counter; - // when GpuMatrix points to user-allocated data, the pointer is NULL - int* refcount; - - //! helper fields used in locateROI and adjustROI - uchar* datastart; - uchar* dataend; - }; - -//#define TemplatedGpuMat // experimental now, deprecated to use -#ifdef TemplatedGpuMat - #include "GpuMat_BetaDeprecated.hpp" -#endif - - //! Creates continuous GPU matrix - CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m); - - //! Ensures that size of the given matrix is not less than (rows, cols) size - //! and matrix type is match specified one too - CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m); - //////////////////////////////// CudaMem //////////////////////////////// // CudaMem is limited cv::Mat with page locked memory allocation. // Page locked memory is only needed for async and faster coping to GPU. diff --git a/modules/gpu/include/opencv2/gpu/gpumat.hpp b/modules/gpu/include/opencv2/gpu/gpumat.hpp new file mode 100644 index 000000000..8ce456a93 --- /dev/null +++ b/modules/gpu/include/opencv2/gpu/gpumat.hpp @@ -0,0 +1,274 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __OPENCV_GPUMAT_HPP__ +#define __OPENCV_GPUMAT_HPP__ + +#include "opencv2/core/core.hpp" +#include "opencv2/gpu/devmem2d.hpp" + +namespace cv { namespace gpu +{ + class Stream; + class CudaMem; + + //! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat. + class CV_EXPORTS GpuMat + { + public: + //! default constructor + GpuMat(); + //! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.) + GpuMat(int rows, int cols, int type); + GpuMat(Size size, int type); + //! constucts GpuMatrix and fills it with the specified value _s. + GpuMat(int rows, int cols, int type, const Scalar& s); + GpuMat(Size size, int type, const Scalar& s); + //! copy constructor + GpuMat(const GpuMat& m); + + //! constructor for GpuMatrix headers pointing to user-allocated data + GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); + GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); + + //! creates a matrix header for a part of the bigger matrix + GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange); + GpuMat(const GpuMat& m, const Rect& roi); + + //! builds GpuMat from Mat. Perfom blocking upload to device. + explicit GpuMat (const Mat& m); + + //! destructor - calls release() + ~GpuMat(); + + //! assignment operators + GpuMat& operator = (const GpuMat& m); + //! assignment operator. Perfom blocking upload to device. + GpuMat& operator = (const Mat& m); + + //! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code. + // Contains just image size, data ptr and step. + template operator DevMem2D_() const; + template operator PtrStep_() const; + + //! pefroms blocking upload data to GpuMat. + void upload(const cv::Mat& m); + + //! upload async + void upload(const CudaMem& m, Stream& stream); + + //! downloads data from device to host memory. Blocking calls. + operator Mat() const; + void download(cv::Mat& m) const; + + //! download async + void download(CudaMem& m, Stream& stream) const; + + //! returns a new GpuMatrix header for the specified row + GpuMat row(int y) const; + //! returns a new GpuMatrix header for the specified column + GpuMat col(int x) const; + //! ... for the specified row span + GpuMat rowRange(int startrow, int endrow) const; + GpuMat rowRange(const Range& r) const; + //! ... for the specified column span + GpuMat colRange(int startcol, int endcol) const; + GpuMat colRange(const Range& r) const; + + //! returns deep copy of the GpuMatrix, i.e. the data is copied + GpuMat clone() const; + //! copies the GpuMatrix content to "m". + // It calls m.create(this->size(), this->type()). + void copyTo( GpuMat& m ) const; + //! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements. + void copyTo( GpuMat& m, const GpuMat& mask ) const; + //! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale. + void convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const; + + void assignTo( GpuMat& m, int type=-1 ) const; + + //! sets every GpuMatrix element to s + GpuMat& operator = (const Scalar& s); + //! sets some of the GpuMatrix elements to s, according to the mask + GpuMat& setTo(const Scalar& s, const GpuMat& mask = GpuMat()); + //! creates alternative GpuMatrix header for the same data, with different + // number of channels and/or different number of rows. see cvReshape. + GpuMat reshape(int cn, int rows = 0) const; + + //! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type. + // previous data is unreferenced if needed. + void create(int rows, int cols, int type); + void create(Size size, int type); + //! decreases reference counter; + // deallocate the data when reference counter reaches 0. + void release(); + + //! swaps with other smart pointer + void swap(GpuMat& mat); + + //! locates GpuMatrix header within a parent GpuMatrix. See below + void locateROI( Size& wholeSize, Point& ofs ) const; + //! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix. + GpuMat& adjustROI( int dtop, int dbottom, int dleft, int dright ); + //! extracts a rectangular sub-GpuMatrix + // (this is a generalized form of row, rowRange etc.) + GpuMat operator()( Range rowRange, Range colRange ) const; + GpuMat operator()( const Rect& roi ) const; + + //! returns true iff the GpuMatrix data is continuous + // (i.e. when there are no gaps between successive rows). + // similar to CV_IS_GpuMat_CONT(cvGpuMat->type) + bool isContinuous() const; + //! returns element size in bytes, + // similar to CV_ELEM_SIZE(cvMat->type) + size_t elemSize() const; + //! returns the size of element channel in bytes. + size_t elemSize1() const; + //! returns element type, similar to CV_MAT_TYPE(cvMat->type) + int type() const; + //! returns element type, similar to CV_MAT_DEPTH(cvMat->type) + int depth() const; + //! returns element type, similar to CV_MAT_CN(cvMat->type) + int channels() const; + //! returns step/elemSize1() + size_t step1() const; + //! returns GpuMatrix size: + // width == number of columns, height == number of rows + Size size() const; + //! returns true if GpuMatrix data is NULL + bool empty() const; + + //! returns pointer to y-th row + uchar* ptr(int y = 0); + const uchar* ptr(int y = 0) const; + + //! template version of the above method + template _Tp* ptr(int y = 0); + template const _Tp* ptr(int y = 0) const; + + //! matrix transposition + GpuMat t() const; + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the number of rows and columns + int rows, cols; + //! a distance between successive rows in bytes; includes the gap if any + size_t step; + //! pointer to the data + uchar* data; + + //! pointer to the reference counter; + // when GpuMatrix points to user-allocated data, the pointer is NULL + int* refcount; + + //! helper fields used in locateROI and adjustROI + uchar* datastart; + uchar* dataend; + }; + + //! Creates continuous GPU matrix + CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m); + CV_EXPORTS GpuMat createContinuous(int rows, int cols, int type); + CV_EXPORTS void createContinuous(Size size, int type, GpuMat& m); + CV_EXPORTS GpuMat createContinuous(Size size, int type); + + //! Ensures that size of the given matrix is not less than (rows, cols) size + //! and matrix type is match specified one too + CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m); + CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m); + + //////////////////////////////////////////////////////////////////////// + //////////////////////////////// GpuMat //////////////////////////////// + //////////////////////////////////////////////////////////////////////// + + inline GpuMat::GpuMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} + + inline GpuMat::GpuMat(int rows_, int cols_, int type_) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) + { + if (rows_ > 0 && cols_ > 0) + create(rows_, cols_, type_); + } + + inline GpuMat::~GpuMat() { release(); } + + template inline GpuMat::operator DevMem2D_() const { return DevMem2D_(rows, cols, (T*)data, step); } + template inline GpuMat::operator PtrStep_() const { return PtrStep_(static_cast< DevMem2D_ >(*this)); } + + inline GpuMat GpuMat::clone() const + { + GpuMat m; + copyTo(m); + return m; + } + + inline void GpuMat::assignTo(GpuMat& m, int type) const + { + if( type < 0 ) + m = *this; + else + convertTo(m, type); + } + + inline size_t GpuMat::step1() const { return step/elemSize1(); } + + inline bool GpuMat::empty() const { return data == 0; } + + template inline _Tp* GpuMat::ptr(int y) + { + return (_Tp*)ptr(y); + } + + template inline const _Tp* GpuMat::ptr(int y) const + { + return (const _Tp*)ptr(y); + } + + inline void swap(GpuMat& a, GpuMat& b) { a.swap(b); } +}} + +#endif // __OPENCV_GPUMAT_HPP__ diff --git a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp index 7af090782..e8bb9167b 100644 --- a/modules/gpu/include/opencv2/gpu/matrix_operations.hpp +++ b/modules/gpu/include/opencv2/gpu/matrix_operations.hpp @@ -48,328 +48,6 @@ namespace cv namespace gpu { - -//////////////////////////////////////////////////////////////////////// -//////////////////////////////// GpuMat //////////////////////////////// -//////////////////////////////////////////////////////////////////////// - -inline GpuMat::GpuMat() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) {} - -inline GpuMat::GpuMat(int _rows, int _cols, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) -{ - if( _rows > 0 && _cols > 0 ) - create( _rows, _cols, _type ); -} - -inline GpuMat::GpuMat(Size _size, int _type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) -{ - if( _size.height > 0 && _size.width > 0 ) - create( _size.height, _size.width, _type ); -} - -inline GpuMat::GpuMat(int _rows, int _cols, int _type, const Scalar& _s) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) -{ - if(_rows > 0 && _cols > 0) - { - create(_rows, _cols, _type); - *this = _s; - } -} - -inline GpuMat::GpuMat(Size _size, int _type, const Scalar& _s) - : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) -{ - if( _size.height > 0 && _size.width > 0 ) - { - create( _size.height, _size.width, _type ); - *this = _s; - } -} - -inline GpuMat::GpuMat(const GpuMat& m) - : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend) -{ - if( refcount ) - CV_XADD(refcount, 1); -} - -inline GpuMat::GpuMat(int _rows, int _cols, int _type, void* _data, size_t _step) - : flags(Mat::MAGIC_VAL + (_type & TYPE_MASK)), rows(_rows), cols(_cols), step(_step), data((uchar*)_data), refcount(0), - datastart((uchar*)_data), dataend((uchar*)_data) -{ - size_t minstep = cols*elemSize(); - if( step == Mat::AUTO_STEP ) - { - step = minstep; - flags |= Mat::CONTINUOUS_FLAG; - } - else - { - if( rows == 1 ) step = minstep; - CV_DbgAssert( step >= minstep ); - flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; - } - dataend += step*(rows-1) + minstep; -} - -inline GpuMat::GpuMat(Size _size, int _type, void* _data, size_t _step) - : flags(Mat::MAGIC_VAL + (_type & TYPE_MASK)), rows(_size.height), cols(_size.width), - step(_step), data((uchar*)_data), refcount(0), - datastart((uchar*)_data), dataend((uchar*)_data) -{ - size_t minstep = cols*elemSize(); - if( step == Mat::AUTO_STEP ) - { - step = minstep; - flags |= Mat::CONTINUOUS_FLAG; - } - else - { - if( rows == 1 ) step = minstep; - CV_DbgAssert( step >= minstep ); - flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; - } - dataend += step*(rows-1) + minstep; -} - - -inline GpuMat::GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange) -{ - flags = m.flags; - step = m.step; refcount = m.refcount; - data = m.data; datastart = m.datastart; dataend = m.dataend; - - if( rowRange == Range::all() ) - rows = m.rows; - else - { - CV_Assert( 0 <= rowRange.start && rowRange.start <= rowRange.end && rowRange.end <= m.rows ); - rows = rowRange.size(); - data += step*rowRange.start; - } - - if( colRange == Range::all() ) - cols = m.cols; - else - { - CV_Assert( 0 <= colRange.start && colRange.start <= colRange.end && colRange.end <= m.cols ); - cols = colRange.size(); - data += colRange.start*elemSize(); - flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; - } - - if( rows == 1 ) - flags |= Mat::CONTINUOUS_FLAG; - - if( refcount ) - CV_XADD(refcount, 1); - if( rows <= 0 || cols <= 0 ) - rows = cols = 0; -} - -inline GpuMat::GpuMat(const GpuMat& m, const Rect& roi) - : flags(m.flags), rows(roi.height), cols(roi.width), - step(m.step), data(m.data + roi.y*step), refcount(m.refcount), - datastart(m.datastart), dataend(m.dataend) -{ - flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; - data += roi.x*elemSize(); - CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && - 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows ); - if( refcount ) - CV_XADD(refcount, 1); - if( rows <= 0 || cols <= 0 ) - rows = cols = 0; -} - -inline GpuMat::GpuMat(const Mat& m) -: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) { upload(m); } - -inline GpuMat::~GpuMat() { release(); } - -inline GpuMat& GpuMat::operator = (const GpuMat& m) -{ - if( this != &m ) - { - if( m.refcount ) - CV_XADD(m.refcount, 1); - release(); - flags = m.flags; - rows = m.rows; cols = m.cols; - step = m.step; data = m.data; - datastart = m.datastart; dataend = m.dataend; - refcount = m.refcount; - } - return *this; -} - -inline GpuMat& GpuMat::operator = (const Mat& m) { upload(m); return *this; } - -template inline GpuMat::operator DevMem2D_() const { return DevMem2D_(rows, cols, (T*)data, step); } -template inline GpuMat::operator PtrStep_() const { return PtrStep_(static_cast< DevMem2D_ >(*this)); } - -//CPP: void GpuMat::upload(const Mat& m); - - inline GpuMat::operator Mat() const - { - Mat m; - download(m); - return m; - } - -//CPP void GpuMat::download(cv::Mat& m) const; - -inline GpuMat GpuMat::row(int y) const { return GpuMat(*this, Range(y, y+1), Range::all()); } -inline GpuMat GpuMat::col(int x) const { return GpuMat(*this, Range::all(), Range(x, x+1)); } -inline GpuMat GpuMat::rowRange(int startrow, int endrow) const { return GpuMat(*this, Range(startrow, endrow), Range::all()); } -inline GpuMat GpuMat::rowRange(const Range& r) const { return GpuMat(*this, r, Range::all()); } -inline GpuMat GpuMat::colRange(int startcol, int endcol) const { return GpuMat(*this, Range::all(), Range(startcol, endcol)); } -inline GpuMat GpuMat::colRange(const Range& r) const { return GpuMat(*this, Range::all(), r); } - -inline GpuMat GpuMat::clone() const -{ - GpuMat m; - copyTo(m); - return m; -} - -//CPP void GpuMat::copyTo( GpuMat& m ) const; -//CPP void GpuMat::copyTo( GpuMat& m, const GpuMat& mask ) const; -//CPP void GpuMat::convertTo( GpuMat& m, int rtype, double alpha=1, double beta=0 ) const; - -inline void GpuMat::assignTo( GpuMat& m, int type ) const -{ - if( type < 0 ) - m = *this; - else - convertTo(m, type); -} - -//CPP GpuMat& GpuMat::operator = (const Scalar& s); -//CPP GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask=GpuMat()); -//CPP GpuMat GpuMat::reshape(int _cn, int _rows=0) const; -inline void GpuMat::create(Size _size, int _type) { create(_size.height, _size.width, _type); } -//CPP void GpuMat::create(int _rows, int _cols, int _type); -//CPP void GpuMat::release(); - -inline void GpuMat::swap(GpuMat& b) -{ - std::swap( flags, b.flags ); - std::swap( rows, b.rows ); std::swap( cols, b.cols ); - std::swap( step, b.step ); std::swap( data, b.data ); - std::swap( datastart, b.datastart ); - std::swap( dataend, b.dataend ); - std::swap( refcount, b.refcount ); -} - -inline void GpuMat::locateROI( Size& wholeSize, Point& ofs ) const -{ - size_t esz = elemSize(), minstep; - ptrdiff_t delta1 = data - datastart, delta2 = dataend - datastart; - CV_DbgAssert( step > 0 ); - if( delta1 == 0 ) - ofs.x = ofs.y = 0; - else - { - ofs.y = (int)(delta1/step); - ofs.x = (int)((delta1 - step*ofs.y)/esz); - CV_DbgAssert( data == datastart + ofs.y*step + ofs.x*esz ); - } - minstep = (ofs.x + cols)*esz; - wholeSize.height = (int)((delta2 - minstep)/step + 1); - wholeSize.height = std::max(wholeSize.height, ofs.y + rows); - wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz); - wholeSize.width = std::max(wholeSize.width, ofs.x + cols); -} - -inline GpuMat& GpuMat::adjustROI( int dtop, int dbottom, int dleft, int dright ) -{ - Size wholeSize; Point ofs; - size_t esz = elemSize(); - locateROI( wholeSize, ofs ); - int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height); - int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width); - data += (row1 - ofs.y)*step + (col1 - ofs.x)*esz; - rows = row2 - row1; cols = col2 - col1; - if( esz*cols == step || rows == 1 ) - flags |= Mat::CONTINUOUS_FLAG; - else - flags &= ~Mat::CONTINUOUS_FLAG; - return *this; -} - -inline GpuMat GpuMat::operator()( Range rowRange, Range colRange ) const { return GpuMat(*this, rowRange, colRange); } -inline GpuMat GpuMat::operator()( const Rect& roi ) const { return GpuMat(*this, roi); } - -inline bool GpuMat::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; } -inline size_t GpuMat::elemSize() const { return CV_ELEM_SIZE(flags); } -inline size_t GpuMat::elemSize1() const { return CV_ELEM_SIZE1(flags); } -inline int GpuMat::type() const { return CV_MAT_TYPE(flags); } -inline int GpuMat::depth() const { return CV_MAT_DEPTH(flags); } -inline int GpuMat::channels() const { return CV_MAT_CN(flags); } -inline size_t GpuMat::step1() const { return step/elemSize1(); } -inline Size GpuMat::size() const { return Size(cols, rows); } -inline bool GpuMat::empty() const { return data == 0; } - -inline uchar* GpuMat::ptr(int y) -{ - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - return data + step*y; -} - -inline const uchar* GpuMat::ptr(int y) const -{ - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - return data + step*y; -} - -template inline _Tp* GpuMat::ptr(int y) -{ - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - return (_Tp*)(data + step*y); -} - -template inline const _Tp* GpuMat::ptr(int y) const -{ - CV_DbgAssert( (unsigned)y < (unsigned)rows ); - return (const _Tp*)(data + step*y); -} - -inline GpuMat GpuMat::t() const -{ - GpuMat tmp; - transpose(*this, tmp); - return tmp; -} - -static inline void swap( GpuMat& a, GpuMat& b ) { a.swap(b); } - -inline GpuMat createContinuous(int rows, int cols, int type) -{ - GpuMat m; - createContinuous(rows, cols, type, m); - return m; -} - -inline void createContinuous(Size size, int type, GpuMat& m) -{ - createContinuous(size.height, size.width, type, m); -} - -inline GpuMat createContinuous(Size size, int type) -{ - GpuMat m; - createContinuous(size, type, m); - return m; -} - -inline void ensureSizeIsEnough(Size size, int type, GpuMat& m) -{ - ensureSizeIsEnough(size.height, size.width, type, m); -} - - /////////////////////////////////////////////////////////////////////// //////////////////////////////// CudaMem //////////////////////////////// /////////////////////////////////////////////////////////////////////// @@ -457,41 +135,6 @@ inline size_t CudaMem::step1() const { return step/elemSize1(); } inline Size CudaMem::size() const { return Size(cols, rows); } inline bool CudaMem::empty() const { return data == 0; } -////////////////////////////////////////////////////////////////////////////// -// Arithmetical operations - -inline GpuMat operator ~ (const GpuMat& src) -{ - GpuMat dst; - bitwise_not(src, dst); - return dst; -} - - -inline GpuMat operator | (const GpuMat& src1, const GpuMat& src2) -{ - GpuMat dst; - bitwise_or(src1, src2, dst); - return dst; -} - - -inline GpuMat operator & (const GpuMat& src1, const GpuMat& src2) -{ - GpuMat dst; - bitwise_and(src1, src2, dst); - return dst; -} - - -inline GpuMat operator ^ (const GpuMat& src1, const GpuMat& src2) -{ - GpuMat dst; - bitwise_xor(src1, src2, dst); - return dst; -} - - } /* end of namespace gpu */ } /* end of namespace cv */ diff --git a/modules/gpu/src/gpumat.cpp b/modules/gpu/src/gpumat.cpp new file mode 100644 index 000000000..095957f9d --- /dev/null +++ b/modules/gpu/src/gpumat.cpp @@ -0,0 +1,910 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +using namespace cv; +using namespace cv::gpu; + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////// GpuMat //////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +cv::gpu::GpuMat::GpuMat(Size size_, int type_) : + flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if (size_.height > 0 && size_.width > 0) + create(size_.height, size_.width, type_); +} + +cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, const Scalar& s_) : + flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if (rows_ > 0 && cols_ > 0) + { + create(rows_, cols_, type_); + *this = s_; + } +} + +cv::gpu::GpuMat::GpuMat(Size size_, int type_, const Scalar& s_) : + flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + if (size_.height > 0 && size_.width > 0) + { + create(size_.height, size_.width, type_); + *this = s_; + } +} + +cv::gpu::GpuMat::GpuMat(const GpuMat& m) : + flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend) +{ + if (refcount) + CV_XADD(refcount, 1); +} + +cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) : + flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(rows_), cols(cols_), step(step_), data((uchar*)data_), refcount(0), + datastart((uchar*)data_), dataend((uchar*)data_) +{ + size_t minstep = cols * elemSize(); + if (step == Mat::AUTO_STEP) + { + step = minstep; + flags |= Mat::CONTINUOUS_FLAG; + } + else + { + if (rows == 1) step = minstep; + CV_DbgAssert( step >= minstep ); + flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; + } + dataend += step * (rows - 1) + minstep; +} + +cv::gpu::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) : + flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(size_.height), cols(size_.width), + step(step_), data((uchar*)data_), refcount(0), + datastart((uchar*)data_), dataend((uchar*)data_) +{ + size_t minstep = cols * elemSize(); + if (step == Mat::AUTO_STEP) + { + step = minstep; + flags |= Mat::CONTINUOUS_FLAG; + } + else + { + if (rows == 1) step = minstep; + CV_DbgAssert( step >= minstep ); + flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0; + } + dataend += step * (rows - 1) + minstep; +} + +cv::gpu::GpuMat::GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange) +{ + flags = m.flags; + step = m.step; refcount = m.refcount; + data = m.data; datastart = m.datastart; dataend = m.dataend; + + if (rowRange == Range::all()) + rows = m.rows; + else + { + CV_Assert( 0 <= rowRange.start && rowRange.start <= rowRange.end && rowRange.end <= m.rows ); + rows = rowRange.size(); + data += step*rowRange.start; + } + + if (colRange == Range::all()) + cols = m.cols; + else + { + CV_Assert( 0 <= colRange.start && colRange.start <= colRange.end && colRange.end <= m.cols ); + cols = colRange.size(); + data += colRange.start*elemSize(); + flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; + } + + if( rows == 1 ) + flags |= Mat::CONTINUOUS_FLAG; + + if( refcount ) + CV_XADD(refcount, 1); + if( rows <= 0 || cols <= 0 ) + rows = cols = 0; +} + +cv::gpu::GpuMat::GpuMat(const GpuMat& m, const Rect& roi) : + flags(m.flags), rows(roi.height), cols(roi.width), + step(m.step), data(m.data + roi.y*step), refcount(m.refcount), + datastart(m.datastart), dataend(m.dataend) +{ + flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; + data += roi.x*elemSize(); + CV_Assert( 0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && + 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows ); + if( refcount ) + CV_XADD(refcount, 1); + if( rows <= 0 || cols <= 0 ) + rows = cols = 0; +} + +cv::gpu::GpuMat::GpuMat(const Mat& m) : + flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0) +{ + upload(m); +} + +GpuMat& cv::gpu::GpuMat::operator = (const GpuMat& m) +{ + if( this != &m ) + { + if( m.refcount ) + CV_XADD(m.refcount, 1); + release(); + flags = m.flags; + rows = m.rows; cols = m.cols; + step = m.step; data = m.data; + datastart = m.datastart; dataend = m.dataend; + refcount = m.refcount; + } + return *this; +} + +GpuMat& cv::gpu::GpuMat::operator = (const Mat& m) +{ + upload(m); return *this; +} + +cv::gpu::GpuMat::operator Mat() const +{ + Mat m; + download(m); + return m; +} + +GpuMat cv::gpu::GpuMat::row(int y) const +{ + return GpuMat(*this, Range(y, y+1), Range::all()); +} + +GpuMat cv::gpu::GpuMat::col(int x) const +{ + return GpuMat(*this, Range::all(), Range(x, x+1)); +} + +GpuMat cv::gpu::GpuMat::rowRange(int startrow, int endrow) const +{ + return GpuMat(*this, Range(startrow, endrow), Range::all()); +} + +GpuMat cv::gpu::GpuMat::rowRange(const Range& r) const +{ + return GpuMat(*this, r, Range::all()); +} + +GpuMat cv::gpu::GpuMat::colRange(int startcol, int endcol) const +{ + return GpuMat(*this, Range::all(), Range(startcol, endcol)); +} + +GpuMat cv::gpu::GpuMat::colRange(const Range& r) const +{ + return GpuMat(*this, Range::all(), r); +} + +void cv::gpu::GpuMat::create(Size size_, int type_) +{ + create(size_.height, size_.width, type_); +} + +void cv::gpu::GpuMat::swap(GpuMat& b) +{ + std::swap( flags, b.flags ); + std::swap( rows, b.rows ); + std::swap( cols, b.cols ); + std::swap( step, b.step ); + std::swap( data, b.data ); + std::swap( datastart, b.datastart ); + std::swap( dataend, b.dataend ); + std::swap( refcount, b.refcount ); +} + +void cv::gpu::GpuMat::locateROI(Size& wholeSize, Point& ofs) const +{ + size_t esz = elemSize(), minstep; + ptrdiff_t delta1 = data - datastart, delta2 = dataend - datastart; + CV_DbgAssert( step > 0 ); + if( delta1 == 0 ) + ofs.x = ofs.y = 0; + else + { + ofs.y = (int)(delta1/step); + ofs.x = (int)((delta1 - step*ofs.y)/esz); + CV_DbgAssert( data == datastart + ofs.y*step + ofs.x*esz ); + } + minstep = (ofs.x + cols)*esz; + wholeSize.height = (int)((delta2 - minstep)/step + 1); + wholeSize.height = std::max(wholeSize.height, ofs.y + rows); + wholeSize.width = (int)((delta2 - step*(wholeSize.height-1))/esz); + wholeSize.width = std::max(wholeSize.width, ofs.x + cols); +} + +GpuMat& cv::gpu::GpuMat::adjustROI(int dtop, int dbottom, int dleft, int dright) +{ + Size wholeSize; Point ofs; + size_t esz = elemSize(); + locateROI( wholeSize, ofs ); + int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height); + int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width); + data += (row1 - ofs.y)*step + (col1 - ofs.x)*esz; + rows = row2 - row1; cols = col2 - col1; + if( esz*cols == step || rows == 1 ) + flags |= Mat::CONTINUOUS_FLAG; + else + flags &= ~Mat::CONTINUOUS_FLAG; + return *this; +} + +cv::gpu::GpuMat GpuMat::operator()(Range rowRange, Range colRange) const +{ + return GpuMat(*this, rowRange, colRange); +} + +cv::gpu::GpuMat GpuMat::operator()(const Rect& roi) const +{ + return GpuMat(*this, roi); +} + +bool cv::gpu::GpuMat::isContinuous() const +{ + return (flags & Mat::CONTINUOUS_FLAG) != 0; +} + +size_t cv::gpu::GpuMat::elemSize() const +{ + return CV_ELEM_SIZE(flags); +} + +size_t cv::gpu::GpuMat::elemSize1() const +{ + return CV_ELEM_SIZE1(flags); +} + +int cv::gpu::GpuMat::type() const +{ + return CV_MAT_TYPE(flags); +} + +int cv::gpu::GpuMat::depth() const +{ + return CV_MAT_DEPTH(flags); +} + +int cv::gpu::GpuMat::channels() const +{ + return CV_MAT_CN(flags); +} + +Size cv::gpu::GpuMat::size() const +{ + return Size(cols, rows); +} + +unsigned char* cv::gpu::GpuMat::ptr(int y) +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step*y; +} + +const unsigned char* cv::gpu::GpuMat::ptr(int y) const +{ + CV_DbgAssert( (unsigned)y < (unsigned)rows ); + return data + step*y; +} + +GpuMat cv::gpu::GpuMat::t() const +{ + GpuMat tmp; + transpose(*this, tmp); + return tmp; +} + +GpuMat cv::gpu::createContinuous(int rows, int cols, int type) +{ + GpuMat m; + createContinuous(rows, cols, type, m); + return m; +} + +void cv::gpu::createContinuous(Size size, int type, GpuMat& m) +{ + createContinuous(size.height, size.width, type, m); +} + +GpuMat cv::gpu::createContinuous(Size size, int type) +{ + GpuMat m; + createContinuous(size, type, m); + return m; +} + +void cv::gpu::ensureSizeIsEnough(Size size, int type, GpuMat& m) +{ + ensureSizeIsEnough(size.height, size.width, type, m); +} + +#if !defined (HAVE_CUDA) + +void cv::gpu::GpuMat::upload(const Mat&) { throw_nogpu(); } +void cv::gpu::GpuMat::download(cv::Mat&) const { throw_nogpu(); } +void cv::gpu::GpuMat::copyTo(GpuMat&) const { throw_nogpu(); } +void cv::gpu::GpuMat::copyTo(GpuMat&, const GpuMat&) const { throw_nogpu(); } +void cv::gpu::GpuMat::convertTo(GpuMat&, int, double, double) const { throw_nogpu(); } +GpuMat& cv::gpu::GpuMat::operator = (const Scalar&) { throw_nogpu(); return *this; } +GpuMat& cv::gpu::GpuMat::setTo(const Scalar&, const GpuMat&) { throw_nogpu(); return *this; } +GpuMat cv::gpu::GpuMat::reshape(int, int) const { throw_nogpu(); return GpuMat(); } +void cv::gpu::GpuMat::create(int, int, int) { throw_nogpu(); } +void cv::gpu::GpuMat::release() {} +void cv::gpu::createContinuous(int, int, int, GpuMat&) { throw_nogpu(); } + +#else /* !defined (HAVE_CUDA) */ + +namespace cv { namespace gpu { namespace matrix_operations +{ + void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); + + template + void set_to_gpu(const DevMem2D& mat, const T* scalar, int channels, cudaStream_t stream); + template + void set_to_gpu(const DevMem2D& mat, const T* scalar, const DevMem2D& mask, int channels, cudaStream_t stream); + + void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0); +}}} + + +void cv::gpu::GpuMat::upload(const Mat& m) +{ + CV_DbgAssert(!m.empty()); + create(m.size(), m.type()); + cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); +} + +void cv::gpu::GpuMat::upload(const CudaMem& m, Stream& stream) +{ + CV_DbgAssert(!m.empty()); + stream.enqueueUpload(m, *this); +} + +void cv::gpu::GpuMat::download(cv::Mat& m) const +{ + CV_DbgAssert(!this->empty()); + m.create(size(), type()); + cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); +} + +void cv::gpu::GpuMat::download(CudaMem& m, Stream& stream) const +{ + CV_DbgAssert(!m.empty()); + stream.enqueueDownload(*this, m); +} + +void cv::gpu::GpuMat::copyTo(GpuMat& m) const +{ + CV_DbgAssert(!this->empty()); + m.create(size(), type()); + cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) ); + cudaSafeCall( cudaDeviceSynchronize() ); +} + +void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const +{ + if (mask.empty()) + { + copyTo(mat); + } + else + { + mat.create(size(), type()); + cv::gpu::matrix_operations::copy_to_with_mask(*this, mat, depth(), mask, channels()); + } +} + +namespace +{ + template struct NPPTypeTraits; + template<> struct NPPTypeTraits { typedef Npp8u npp_type; }; + template<> struct NPPTypeTraits { typedef Npp16u npp_type; }; + template<> struct NPPTypeTraits { typedef Npp16s npp_type; }; + template<> struct NPPTypeTraits { typedef Npp32s npp_type; }; + template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; + + template struct NppConvertFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef typename NPPTypeTraits::npp_type dst_t; + + typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI); + }; + template struct NppConvertFunc + { + typedef typename NPPTypeTraits::npp_type dst_t; + + typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode); + }; + + template::func_ptr func> struct NppCvt + { + typedef typename NPPTypeTraits::npp_type src_t; + typedef typename NPPTypeTraits::npp_type dst_t; + + static void cvt(const GpuMat& src, GpuMat& dst) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func> struct NppCvt + { + typedef typename NPPTypeTraits::npp_type dst_t; + + static void cvt(const GpuMat& src, GpuMat& dst) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, NPP_RND_NEAR) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + void convertToKernelCaller(const GpuMat& src, GpuMat& dst) + { + matrix_operations::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0); + } +} + +void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const +{ + CV_Assert((depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) || + (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); + + bool noScale = fabs(alpha-1) < std::numeric_limits::epsilon() && fabs(beta) < std::numeric_limits::epsilon(); + + if( rtype < 0 ) + rtype = type(); + else + rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); + + int scn = channels(); + int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); + if( sdepth == ddepth && noScale ) + { + copyTo(dst); + return; + } + + GpuMat temp; + const GpuMat* psrc = this; + if( sdepth != ddepth && psrc == &dst ) + psrc = &(temp = *this); + + dst.create( size(), rtype ); + + if (!noScale) + matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta); + else + { + typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst); + static const convert_caller_t convert_callers[8][8][4] = + { + { + {0,0,0,0}, + {convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0} + }, + { + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, + {0,0,0,0}, + {0,0,0,0} + }, + { + {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0} + } + }; + + convert_callers[sdepth][ddepth][scn-1](*psrc, dst); + } +} + +GpuMat& GpuMat::operator = (const Scalar& s) +{ + setTo(s); + return *this; +} + +namespace +{ + template struct NppSetFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI); + }; + template struct NppSetFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI); + }; + + template::func_ptr func> struct NppSet + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func> struct NppSet + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + template + void kernelSet(GpuMat& src, const Scalar& s) + { + Scalar_ sf = s; + matrix_operations::set_to_gpu(src, sf.val, src.channels(), 0); + } + + template struct NppSetMaskFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); + }; + template struct NppSetMaskFunc + { + typedef typename NPPTypeTraits::npp_type src_t; + + typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); + }; + + template::func_ptr func> struct NppSetMask + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + template::func_ptr func> struct NppSetMask + { + typedef typename NPPTypeTraits::npp_type src_t; + + static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) + { + NppiSize sz; + sz.width = src.cols; + sz.height = src.rows; + Scalar_ nppS = s; + nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); + + cudaSafeCall( cudaDeviceSynchronize() ); + } + }; + + template + void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask) + { + Scalar_ sf = s; + matrix_operations::set_to_gpu(src, sf.val, mask, src.channels(), 0); + } +} + +GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) +{ + CV_Assert(mask.type() == CV_8UC1); + + CV_Assert((depth() != CV_64F) || + (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); + + CV_DbgAssert(!this->empty()); + + NppiSize sz; + sz.width = cols; + sz.height = rows; + + if (mask.empty()) + { + if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) + { + cudaSafeCall( cudaMemset2D(data, step, 0, cols * elemSize(), rows) ); + return *this; + } + if (depth() == CV_8U) + { + int cn = channels(); + + if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) + { + int val = saturate_cast(s[0]); + cudaSafeCall( cudaMemset2D(data, step, val, cols * elemSize(), rows) ); + return *this; + } + } + typedef void (*set_caller_t)(GpuMat& src, const Scalar& s); + static const set_caller_t set_callers[8][4] = + { + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {kernelSet,kernelSet,kernelSet,kernelSet}, + {NppSet::set,NppSet::set,kernelSet,NppSet::set}, + {NppSet::set,NppSet::set,kernelSet,NppSet::set}, + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {NppSet::set,kernelSet,kernelSet,NppSet::set}, + {kernelSet,kernelSet,kernelSet,kernelSet}, + {0,0,0,0} + }; + set_callers[depth()][channels()-1](*this, s); + } + else + { + typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask); + static const set_caller_t set_callers[8][4] = + { + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, + {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask}, + {0,0,0,0} + }; + set_callers[depth()][channels()-1](*this, s, mask); + } + + return *this; +} + + +GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const +{ + GpuMat hdr = *this; + + int cn = channels(); + if( new_cn == 0 ) + new_cn = cn; + + int total_width = cols * cn; + + if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 ) + new_rows = rows * total_width / new_cn; + + if( new_rows != 0 && new_rows != rows ) + { + int total_size = total_width * rows; + if( !isContinuous() ) + CV_Error( CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed" ); + + if( (unsigned)new_rows > (unsigned)total_size ) + CV_Error( CV_StsOutOfRange, "Bad new number of rows" ); + + total_width = total_size / new_rows; + + if( total_width * new_rows != total_size ) + CV_Error( CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows" ); + + hdr.rows = new_rows; + hdr.step = total_width * elemSize1(); + } + + int new_width = total_width / new_cn; + + if( new_width * new_cn != total_width ) + CV_Error( CV_BadNumChannels, "The total width is not divisible by the new number of channels" ); + + hdr.cols = new_width; + hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); + return hdr; +} + +void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) +{ + _type &= TYPE_MASK; + if( rows == _rows && cols == _cols && type() == _type && data ) + return; + if( data ) + release(); + CV_DbgAssert( _rows >= 0 && _cols >= 0 ); + if( _rows > 0 && _cols > 0 ) + { + flags = Mat::MAGIC_VAL + _type; + rows = _rows; + cols = _cols; + + size_t esz = elemSize(); + + void *dev_ptr; + cudaSafeCall( cudaMallocPitch(&dev_ptr, &step, esz * cols, rows) ); + + // Single row must be continuous + if (rows == 1) + step = esz * cols; + + if (esz * cols == step) + flags |= Mat::CONTINUOUS_FLAG; + + int64 _nettosize = (int64)step*rows; + size_t nettosize = (size_t)_nettosize; + + datastart = data = (uchar*)dev_ptr; + dataend = data + nettosize; + + refcount = (int*)fastMalloc(sizeof(*refcount)); + *refcount = 1; + } +} + +void cv::gpu::GpuMat::release() +{ + if( refcount && CV_XADD(refcount, -1) == 1 ) + { + fastFree(refcount); + cudaSafeCall( cudaFree(datastart) ); + } + data = datastart = dataend = 0; + step = rows = cols = 0; + refcount = 0; +} + +void cv::gpu::createContinuous(int rows, int cols, int type, GpuMat& m) +{ + int area = rows * cols; + if (!m.isContinuous() || m.type() != type || m.size().area() != area) + m.create(1, area, type); + m = m.reshape(0, rows); +} + +void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m) +{ + if (m.type() == type && m.rows >= rows && m.cols >= cols) + m = m(Rect(0, 0, cols, rows)); + else + m.create(rows, cols, type); +} + +#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpu/src/matrix_operations.cpp b/modules/gpu/src/matrix_operations.cpp index 453658718..a49c8a556 100644 --- a/modules/gpu/src/matrix_operations.cpp +++ b/modules/gpu/src/matrix_operations.cpp @@ -52,554 +52,13 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) -namespace cv -{ - namespace gpu - { - void GpuMat::upload(const Mat& /*m*/) { throw_nogpu(); } - void GpuMat::download(cv::Mat& /*m*/) const { throw_nogpu(); } - void GpuMat::copyTo( GpuMat& /*m*/ ) const { throw_nogpu(); } - void GpuMat::copyTo( GpuMat& /*m*/, const GpuMat&/* mask */) const { throw_nogpu(); } - void GpuMat::convertTo( GpuMat& /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const { throw_nogpu(); } - GpuMat& GpuMat::operator = (const Scalar& /*s*/) { throw_nogpu(); return *this; } - GpuMat& GpuMat::setTo(const Scalar& /*s*/, const GpuMat& /*mask*/) { throw_nogpu(); return *this; } - GpuMat GpuMat::reshape(int /*new_cn*/, int /*new_rows*/) const { throw_nogpu(); return GpuMat(); } - void GpuMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) { throw_nogpu(); } - void GpuMat::release() {} - - void createContinuous(int /*rows*/, int /*cols*/, int /*type*/, GpuMat& /*m*/) { throw_nogpu(); } - - void CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); } - bool CudaMem::canMapHostMemory() { throw_nogpu(); return false; } - void CudaMem::release() { throw_nogpu(); } - GpuMat CudaMem::createGpuMatHeader () const { throw_nogpu(); return GpuMat(); } - } - -} +void cv::gpu::CudaMem::create(int /*_rows*/, int /*_cols*/, int /*_type*/, int /*type_alloc*/) { throw_nogpu(); } +bool cv::gpu::CudaMem::canMapHostMemory() { throw_nogpu(); return false; } +void cv::gpu::CudaMem::release() { throw_nogpu(); } +GpuMat cv::gpu::CudaMem::createGpuMatHeader () const { throw_nogpu(); return GpuMat(); } #else /* !defined (HAVE_CUDA) */ -namespace cv -{ - namespace gpu - { - namespace matrix_operations - { - void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0); - - template - void set_to_gpu(const DevMem2D& mat, const T* scalar, int channels, cudaStream_t stream); - template - void set_to_gpu(const DevMem2D& mat, const T* scalar, const DevMem2D& mask, int channels, cudaStream_t stream); - - void convert_gpu(const DevMem2D& src, int sdepth, const DevMem2D& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0); - } - } -} - -void cv::gpu::GpuMat::upload(const Mat& m) -{ - CV_DbgAssert(!m.empty()); - create(m.size(), m.type()); - cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) ); -} - -void cv::gpu::GpuMat::upload(const CudaMem& m, Stream& stream) -{ - CV_DbgAssert(!m.empty()); - stream.enqueueUpload(m, *this); -} - -void cv::gpu::GpuMat::download(cv::Mat& m) const -{ - CV_DbgAssert(!this->empty()); - m.create(size(), type()); - cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) ); -} - -void cv::gpu::GpuMat::download(CudaMem& m, Stream& stream) const -{ - CV_DbgAssert(!m.empty()); - stream.enqueueDownload(*this, m); -} - -void cv::gpu::GpuMat::copyTo( GpuMat& m ) const -{ - CV_DbgAssert(!this->empty()); - m.create(size(), type()); - cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToDevice) ); - cudaSafeCall( cudaDeviceSynchronize() ); -} - -void cv::gpu::GpuMat::copyTo( GpuMat& mat, const GpuMat& mask ) const -{ - if (mask.empty()) - { - copyTo(mat); - } - else - { - mat.create(size(), type()); - cv::gpu::matrix_operations::copy_to_with_mask(*this, mat, depth(), mask, channels()); - } -} - -namespace -{ - template struct NPPTypeTraits; - template<> struct NPPTypeTraits { typedef Npp8u npp_type; }; - template<> struct NPPTypeTraits { typedef Npp16u npp_type; }; - template<> struct NPPTypeTraits { typedef Npp16s npp_type; }; - template<> struct NPPTypeTraits { typedef Npp32s npp_type; }; - template<> struct NPPTypeTraits { typedef Npp32f npp_type; }; - - template struct NppConvertFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef typename NPPTypeTraits::npp_type dst_t; - - typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI); - }; - template struct NppConvertFunc - { - typedef typename NPPTypeTraits::npp_type dst_t; - - typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode); - }; - - template::func_ptr func> struct NppCvt - { - typedef typename NPPTypeTraits::npp_type src_t; - typedef typename NPPTypeTraits::npp_type dst_t; - - static void cvt(const GpuMat& src, GpuMat& dst) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppCvt - { - typedef typename NPPTypeTraits::npp_type dst_t; - - static void cvt(const GpuMat& src, GpuMat& dst) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, NPP_RND_NEAR) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - void convertToKernelCaller(const GpuMat& src, GpuMat& dst) - { - matrix_operations::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0); - } -} - -void cv::gpu::GpuMat::convertTo( GpuMat& dst, int rtype, double alpha, double beta ) const -{ - CV_Assert((depth() != CV_64F && CV_MAT_DEPTH(rtype) != CV_64F) || - (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); - - bool noScale = fabs(alpha-1) < std::numeric_limits::epsilon() && fabs(beta) < std::numeric_limits::epsilon(); - - if( rtype < 0 ) - rtype = type(); - else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); - - int scn = channels(); - int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); - if( sdepth == ddepth && noScale ) - { - copyTo(dst); - return; - } - - GpuMat temp; - const GpuMat* psrc = this; - if( sdepth != ddepth && psrc == &dst ) - psrc = &(temp = *this); - - dst.create( size(), rtype ); - - if (!noScale) - matrix_operations::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta); - else - { - typedef void (*convert_caller_t)(const GpuMat& src, GpuMat& dst); - static const convert_caller_t convert_callers[8][8][4] = - { - { - {0,0,0,0}, - {convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0} - }, - { - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0} - }, - { - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0} - }, - { - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt::cvt}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0} - }, - { - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0} - }, - { - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {NppCvt::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0} - }, - { - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}, - {0,0,0,0}, - {0,0,0,0} - }, - { - {0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0} - } - }; - - convert_callers[sdepth][ddepth][scn-1](*psrc, dst); - } -} - -GpuMat& GpuMat::operator = (const Scalar& s) -{ - setTo(s); - return *this; -} - -namespace -{ - template struct NppSetFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - template struct NppSetFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI); - }; - - template::func_ptr func> struct NppSet - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, const Scalar& s) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - Scalar_ nppS = s; - nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppSet - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, const Scalar& s) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - Scalar_ nppS = s; - nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - template - void kernelSet(GpuMat& src, const Scalar& s) - { - Scalar_ sf = s; - matrix_operations::set_to_gpu(src, sf.val, src.channels(), 0); - } - - template struct NppSetMaskFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); - }; - template struct NppSetMaskFunc - { - typedef typename NPPTypeTraits::npp_type src_t; - - typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep); - }; - - template::func_ptr func> struct NppSetMask - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - Scalar_ nppS = s; - nppSafeCall( func(nppS.val, src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - template::func_ptr func> struct NppSetMask - { - typedef typename NPPTypeTraits::npp_type src_t; - - static void set(GpuMat& src, const Scalar& s, const GpuMat& mask) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - Scalar_ nppS = s; - nppSafeCall( func(nppS[0], src.ptr(), static_cast(src.step), sz, mask.ptr(), static_cast(mask.step)) ); - - cudaSafeCall( cudaDeviceSynchronize() ); - } - }; - - template - void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask) - { - Scalar_ sf = s; - matrix_operations::set_to_gpu(src, sf.val, mask, src.channels(), 0); - } -} - -GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask) -{ - CV_Assert(mask.type() == CV_8UC1); - - CV_Assert((depth() != CV_64F) || - (TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); - - CV_DbgAssert(!this->empty()); - - NppiSize sz; - sz.width = cols; - sz.height = rows; - - if (mask.empty()) - { - if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0) - { - cudaSafeCall( cudaMemset2D(data, step, 0, cols * elemSize(), rows) ); - return *this; - } - if (depth() == CV_8U) - { - int cn = channels(); - - if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3])) - { - int val = saturate_cast(s[0]); - cudaSafeCall( cudaMemset2D(data, step, val, cols * elemSize(), rows) ); - return *this; - } - } - typedef void (*set_caller_t)(GpuMat& src, const Scalar& s); - static const set_caller_t set_callers[8][4] = - { - {NppSet::set,kernelSet,kernelSet,NppSet::set}, - {kernelSet,kernelSet,kernelSet,kernelSet}, - {NppSet::set,NppSet::set,kernelSet,NppSet::set}, - {NppSet::set,NppSet::set,kernelSet,NppSet::set}, - {NppSet::set,kernelSet,kernelSet,NppSet::set}, - {NppSet::set,kernelSet,kernelSet,NppSet::set}, - {kernelSet,kernelSet,kernelSet,kernelSet}, - {0,0,0,0} - }; - set_callers[depth()][channels()-1](*this, s); - } - else - { - typedef void (*set_caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask); - static const set_caller_t set_callers[8][4] = - { - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {NppSetMask::set,kernelSetMask,kernelSetMask,NppSetMask::set}, - {kernelSetMask,kernelSetMask,kernelSetMask,kernelSetMask}, - {0,0,0,0} - }; - set_callers[depth()][channels()-1](*this, s, mask); - } - - return *this; -} - - -GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const -{ - GpuMat hdr = *this; - - int cn = channels(); - if( new_cn == 0 ) - new_cn = cn; - - int total_width = cols * cn; - - if( (new_cn > total_width || total_width % new_cn != 0) && new_rows == 0 ) - new_rows = rows * total_width / new_cn; - - if( new_rows != 0 && new_rows != rows ) - { - int total_size = total_width * rows; - if( !isContinuous() ) - CV_Error( CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed" ); - - if( (unsigned)new_rows > (unsigned)total_size ) - CV_Error( CV_StsOutOfRange, "Bad new number of rows" ); - - total_width = total_size / new_rows; - - if( total_width * new_rows != total_size ) - CV_Error( CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows" ); - - hdr.rows = new_rows; - hdr.step = total_width * elemSize1(); - } - - int new_width = total_width / new_cn; - - if( new_width * new_cn != total_width ) - CV_Error( CV_BadNumChannels, "The total width is not divisible by the new number of channels" ); - - hdr.cols = new_width; - hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn-1) << CV_CN_SHIFT); - return hdr; -} - -void cv::gpu::GpuMat::create(int _rows, int _cols, int _type) -{ - _type &= TYPE_MASK; - if( rows == _rows && cols == _cols && type() == _type && data ) - return; - if( data ) - release(); - CV_DbgAssert( _rows >= 0 && _cols >= 0 ); - if( _rows > 0 && _cols > 0 ) - { - flags = Mat::MAGIC_VAL + _type; - rows = _rows; - cols = _cols; - - size_t esz = elemSize(); - - void *dev_ptr; - cudaSafeCall( cudaMallocPitch(&dev_ptr, &step, esz * cols, rows) ); - - // Single row must be continuous - if (rows == 1) - step = esz * cols; - - if (esz * cols == step) - flags |= Mat::CONTINUOUS_FLAG; - - int64 _nettosize = (int64)step*rows; - size_t nettosize = (size_t)_nettosize; - - datastart = data = (uchar*)dev_ptr; - dataend = data + nettosize; - - refcount = (int*)fastMalloc(sizeof(*refcount)); - *refcount = 1; - } -} - -void cv::gpu::GpuMat::release() -{ - if( refcount && CV_XADD(refcount, -1) == 1 ) - { - fastFree(refcount); - cudaSafeCall( cudaFree(datastart) ); - } - data = datastart = dataend = 0; - step = rows = cols = 0; - refcount = 0; -} - -void cv::gpu::createContinuous(int rows, int cols, int type, GpuMat& m) -{ - int area = rows * cols; - if (!m.isContinuous() || m.type() != type || m.size().area() != area) - m.create(1, area, type); - m = m.reshape(0, rows); -} - -void cv::gpu::ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m) -{ - if (m.type() == type && m.rows >= rows && m.cols >= cols) - m = m(Rect(0, 0, cols, rows)); - else - m.create(rows, cols, type); -} - - /////////////////////////////////////////////////////////////////////// //////////////////////////////// CudaMem ////////////////////////////// ///////////////////////////////////////////////////////////////////////