moved GpuMat and DevMem2D to core module, some code refactoring
This commit is contained in:
parent
8a148e39f0
commit
fcfa72081e
@ -90,6 +90,10 @@ class Mat;
|
|||||||
class SparseMat;
|
class SparseMat;
|
||||||
typedef Mat MatND;
|
typedef Mat MatND;
|
||||||
|
|
||||||
|
namespace gpu {
|
||||||
|
class GpuMat;
|
||||||
|
}
|
||||||
|
|
||||||
class CV_EXPORTS MatExpr;
|
class CV_EXPORTS MatExpr;
|
||||||
class CV_EXPORTS MatOp_Base;
|
class CV_EXPORTS MatOp_Base;
|
||||||
class CV_EXPORTS MatArg;
|
class CV_EXPORTS MatArg;
|
||||||
@ -1627,6 +1631,10 @@ public:
|
|||||||
template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true);
|
template<typename _Tp> explicit Mat(const Point3_<_Tp>& pt, bool copyData=true);
|
||||||
//! builds matrix from comma initializer
|
//! builds matrix from comma initializer
|
||||||
template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
|
template<typename _Tp> explicit Mat(const MatCommaInitializer_<_Tp>& commaInitializer);
|
||||||
|
|
||||||
|
//! download data from GpuMat
|
||||||
|
explicit Mat(const gpu::GpuMat& m);
|
||||||
|
|
||||||
//! destructor - calls release()
|
//! destructor - calls release()
|
||||||
~Mat();
|
~Mat();
|
||||||
//! assignment operators
|
//! assignment operators
|
||||||
|
157
modules/core/include/opencv2/core/devmem2d.hpp
Normal file
157
modules/core/include/opencv2/core/devmem2d.hpp
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other GpuMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef __OPENCV_CORE_DevMem2D_HPP__
|
||||||
|
#define __OPENCV_CORE_DevMem2D_HPP__
|
||||||
|
|
||||||
|
#ifdef __CUDACC__
|
||||||
|
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
|
||||||
|
#else
|
||||||
|
#define __CV_GPU_HOST_DEVICE__
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace cv
|
||||||
|
{
|
||||||
|
namespace gpu
|
||||||
|
{
|
||||||
|
// Simple lightweight structures that encapsulates information about an image on device.
|
||||||
|
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
||||||
|
|
||||||
|
template <bool expr> struct StaticAssert;
|
||||||
|
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
|
||||||
|
|
||||||
|
template<typename T> struct DevPtr
|
||||||
|
{
|
||||||
|
typedef T elem_type;
|
||||||
|
typedef int index_type;
|
||||||
|
|
||||||
|
enum { elem_size = sizeof(elem_type) };
|
||||||
|
|
||||||
|
T* data;
|
||||||
|
|
||||||
|
__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
|
||||||
|
__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
|
||||||
|
|
||||||
|
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
||||||
|
__CV_GPU_HOST_DEVICE__ operator T*() { return data; }
|
||||||
|
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct PtrSz : public DevPtr<T>
|
||||||
|
{
|
||||||
|
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
|
||||||
|
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
|
||||||
|
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct PtrStep : public DevPtr<T>
|
||||||
|
{
|
||||||
|
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
|
||||||
|
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
|
||||||
|
|
||||||
|
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
|
||||||
|
size_t step;
|
||||||
|
|
||||||
|
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
|
||||||
|
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
|
||||||
|
|
||||||
|
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
||||||
|
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct PtrStepSz : public PtrStep<T>
|
||||||
|
{
|
||||||
|
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
|
||||||
|
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
|
||||||
|
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
|
||||||
|
|
||||||
|
int cols;
|
||||||
|
int rows;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T> struct DevMem2D_ : public PtrStepSz<T>
|
||||||
|
{
|
||||||
|
DevMem2D_() {}
|
||||||
|
DevMem2D_(int rows_, int cols_, T* data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
|
||||||
|
|
||||||
|
template <typename U>
|
||||||
|
explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct PtrElemStep_ : public PtrStep<T>
|
||||||
|
{
|
||||||
|
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
|
||||||
|
{
|
||||||
|
StaticAssert<256 % sizeof(T) == 0>::check();
|
||||||
|
|
||||||
|
PtrStep<T>::step /= PtrStep<T>::elem_size;
|
||||||
|
}
|
||||||
|
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
|
||||||
|
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
|
||||||
|
|
||||||
|
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
||||||
|
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename T> struct PtrStep_ : public PtrStep<T>
|
||||||
|
{
|
||||||
|
PtrStep_() {}
|
||||||
|
PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef DevMem2D_<unsigned char> DevMem2Db;
|
||||||
|
typedef DevMem2Db DevMem2D;
|
||||||
|
typedef DevMem2D_<float> DevMem2Df;
|
||||||
|
typedef DevMem2D_<int> DevMem2Di;
|
||||||
|
|
||||||
|
typedef PtrStep<unsigned char> PtrStepb;
|
||||||
|
typedef PtrStep<float> PtrStepf;
|
||||||
|
typedef PtrStep<int> PtrStepi;
|
||||||
|
|
||||||
|
typedef PtrElemStep_<unsigned char> PtrElemStep;
|
||||||
|
typedef PtrElemStep_<float> PtrElemStepf;
|
||||||
|
typedef PtrElemStep_<int> PtrElemStepi;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
|
471
modules/core/include/opencv2/core/gpumat.hpp
Normal file
471
modules/core/include/opencv2/core/gpumat.hpp
Normal file
@ -0,0 +1,471 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other GpuMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef __OPENCV_GPUMAT_HPP__
|
||||||
|
#define __OPENCV_GPUMAT_HPP__
|
||||||
|
|
||||||
|
#include "opencv2/core/core.hpp"
|
||||||
|
#include "opencv2/core/devmem2d.hpp"
|
||||||
|
|
||||||
|
namespace cv { namespace gpu
|
||||||
|
{
|
||||||
|
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
|
||||||
|
class CV_EXPORTS GpuMat
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
//! default constructor
|
||||||
|
GpuMat();
|
||||||
|
|
||||||
|
//! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
|
||||||
|
GpuMat(int rows, int cols, int type);
|
||||||
|
GpuMat(Size size, int type);
|
||||||
|
|
||||||
|
//! constucts GpuMatrix and fills it with the specified value _s.
|
||||||
|
GpuMat(int rows, int cols, int type, Scalar s);
|
||||||
|
GpuMat(Size size, int type, Scalar s);
|
||||||
|
|
||||||
|
//! copy constructor
|
||||||
|
GpuMat(const GpuMat& m);
|
||||||
|
|
||||||
|
//! constructor for GpuMatrix headers pointing to user-allocated data
|
||||||
|
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
|
||||||
|
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
|
||||||
|
|
||||||
|
//! creates a matrix header for a part of the bigger matrix
|
||||||
|
GpuMat(const GpuMat& m, Range rowRange, Range colRange);
|
||||||
|
GpuMat(const GpuMat& m, Rect roi);
|
||||||
|
|
||||||
|
//! builds GpuMat from Mat. Perfom blocking upload to device.
|
||||||
|
explicit GpuMat(const Mat& m);
|
||||||
|
|
||||||
|
//! destructor - calls release()
|
||||||
|
~GpuMat();
|
||||||
|
|
||||||
|
//! assignment operators
|
||||||
|
GpuMat& operator = (const GpuMat& m);
|
||||||
|
|
||||||
|
//! pefroms blocking upload data to GpuMat.
|
||||||
|
void upload(const Mat& m);
|
||||||
|
|
||||||
|
//! downloads data from device to host memory. Blocking calls.
|
||||||
|
void download(Mat& m) const;
|
||||||
|
|
||||||
|
//! returns a new GpuMatrix header for the specified row
|
||||||
|
GpuMat row(int y) const;
|
||||||
|
//! returns a new GpuMatrix header for the specified column
|
||||||
|
GpuMat col(int x) const;
|
||||||
|
//! ... for the specified row span
|
||||||
|
GpuMat rowRange(int startrow, int endrow) const;
|
||||||
|
GpuMat rowRange(Range r) const;
|
||||||
|
//! ... for the specified column span
|
||||||
|
GpuMat colRange(int startcol, int endcol) const;
|
||||||
|
GpuMat colRange(Range r) const;
|
||||||
|
|
||||||
|
//! returns deep copy of the GpuMatrix, i.e. the data is copied
|
||||||
|
GpuMat clone() const;
|
||||||
|
//! copies the GpuMatrix content to "m".
|
||||||
|
// It calls m.create(this->size(), this->type()).
|
||||||
|
void copyTo(GpuMat& m) const;
|
||||||
|
//! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements.
|
||||||
|
void copyTo(GpuMat& m, const GpuMat& mask) const;
|
||||||
|
//! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale.
|
||||||
|
void convertTo(GpuMat& m, int rtype, double alpha = 1, double beta = 0) const;
|
||||||
|
|
||||||
|
void assignTo(GpuMat& m, int type=-1) const;
|
||||||
|
|
||||||
|
//! sets every GpuMatrix element to s
|
||||||
|
GpuMat& operator = (Scalar s);
|
||||||
|
//! sets some of the GpuMatrix elements to s, according to the mask
|
||||||
|
GpuMat& setTo(Scalar s, const GpuMat& mask = GpuMat());
|
||||||
|
//! creates alternative GpuMatrix header for the same data, with different
|
||||||
|
// number of channels and/or different number of rows. see cvReshape.
|
||||||
|
GpuMat reshape(int cn, int rows = 0) const;
|
||||||
|
|
||||||
|
//! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type.
|
||||||
|
// previous data is unreferenced if needed.
|
||||||
|
void create(int rows, int cols, int type);
|
||||||
|
void create(Size size, int type);
|
||||||
|
//! decreases reference counter;
|
||||||
|
// deallocate the data when reference counter reaches 0.
|
||||||
|
void release();
|
||||||
|
|
||||||
|
//! swaps with other smart pointer
|
||||||
|
void swap(GpuMat& mat);
|
||||||
|
|
||||||
|
//! locates GpuMatrix header within a parent GpuMatrix. See below
|
||||||
|
void locateROI(Size& wholeSize, Point& ofs) const;
|
||||||
|
//! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix.
|
||||||
|
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
|
||||||
|
//! extracts a rectangular sub-GpuMatrix
|
||||||
|
// (this is a generalized form of row, rowRange etc.)
|
||||||
|
GpuMat operator()(Range rowRange, Range colRange) const;
|
||||||
|
GpuMat operator()(Rect roi) const;
|
||||||
|
|
||||||
|
//! returns true iff the GpuMatrix data is continuous
|
||||||
|
// (i.e. when there are no gaps between successive rows).
|
||||||
|
// similar to CV_IS_GpuMat_CONT(cvGpuMat->type)
|
||||||
|
bool isContinuous() const;
|
||||||
|
//! returns element size in bytes,
|
||||||
|
// similar to CV_ELEM_SIZE(cvMat->type)
|
||||||
|
size_t elemSize() const;
|
||||||
|
//! returns the size of element channel in bytes.
|
||||||
|
size_t elemSize1() const;
|
||||||
|
//! returns element type, similar to CV_MAT_TYPE(cvMat->type)
|
||||||
|
int type() const;
|
||||||
|
//! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
|
||||||
|
int depth() const;
|
||||||
|
//! returns element type, similar to CV_MAT_CN(cvMat->type)
|
||||||
|
int channels() const;
|
||||||
|
//! returns step/elemSize1()
|
||||||
|
size_t step1() const;
|
||||||
|
//! returns GpuMatrix size:
|
||||||
|
// width == number of columns, height == number of rows
|
||||||
|
Size size() const;
|
||||||
|
//! returns true if GpuMatrix data is NULL
|
||||||
|
bool empty() const;
|
||||||
|
|
||||||
|
//! returns pointer to y-th row
|
||||||
|
uchar* ptr(int y = 0);
|
||||||
|
const uchar* ptr(int y = 0) const;
|
||||||
|
|
||||||
|
//! template version of the above method
|
||||||
|
template<typename _Tp> _Tp* ptr(int y = 0);
|
||||||
|
template<typename _Tp> const _Tp* ptr(int y = 0) const;
|
||||||
|
|
||||||
|
template <typename _Tp> operator DevMem2D_<_Tp>() const;
|
||||||
|
template <typename _Tp> operator PtrStep_<_Tp>() const;
|
||||||
|
|
||||||
|
/*! includes several bit-fields:
|
||||||
|
- the magic signature
|
||||||
|
- continuity flag
|
||||||
|
- depth
|
||||||
|
- number of channels
|
||||||
|
*/
|
||||||
|
int flags;
|
||||||
|
|
||||||
|
//! the number of rows and columns
|
||||||
|
int rows, cols;
|
||||||
|
|
||||||
|
//! a distance between successive rows in bytes; includes the gap if any
|
||||||
|
size_t step;
|
||||||
|
|
||||||
|
//! pointer to the data
|
||||||
|
uchar* data;
|
||||||
|
|
||||||
|
//! pointer to the reference counter;
|
||||||
|
// when GpuMatrix points to user-allocated data, the pointer is NULL
|
||||||
|
int* refcount;
|
||||||
|
|
||||||
|
//! helper fields used in locateROI and adjustROI
|
||||||
|
uchar* datastart;
|
||||||
|
uchar* dataend;
|
||||||
|
};
|
||||||
|
|
||||||
|
//! Creates continuous GPU matrix
|
||||||
|
CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m);
|
||||||
|
CV_EXPORTS GpuMat createContinuous(int rows, int cols, int type);
|
||||||
|
CV_EXPORTS void createContinuous(Size size, int type, GpuMat& m);
|
||||||
|
CV_EXPORTS GpuMat createContinuous(Size size, int type);
|
||||||
|
|
||||||
|
//! Ensures that size of the given matrix is not less than (rows, cols) size
|
||||||
|
//! and matrix type is match specified one too
|
||||||
|
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
|
||||||
|
CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m);
|
||||||
|
|
||||||
|
class CV_EXPORTS GpuFuncTable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~GpuFuncTable() {}
|
||||||
|
|
||||||
|
virtual void copy(const Mat& src, GpuMat& dst) const = 0;
|
||||||
|
virtual void copy(const GpuMat& src, Mat& dst) const = 0;
|
||||||
|
virtual void copy(const GpuMat& src, GpuMat& dst) const = 0;
|
||||||
|
|
||||||
|
virtual void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const = 0;
|
||||||
|
|
||||||
|
virtual void convert(const GpuMat& src, GpuMat& dst) const = 0;
|
||||||
|
virtual void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const = 0;
|
||||||
|
|
||||||
|
virtual void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const = 0;
|
||||||
|
|
||||||
|
virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0;
|
||||||
|
virtual void free(void* devPtr) const = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
CV_EXPORTS void setGpuFuncTable(const GpuFuncTable* funcTbl);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
inline GpuMat::GpuMat()
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat::GpuMat(int rows_, int cols_, int type_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||||
|
{
|
||||||
|
if (rows_ > 0 && cols_ > 0)
|
||||||
|
create(rows_, cols_, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat::GpuMat(Size size_, int type_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||||
|
{
|
||||||
|
if (size_.height > 0 && size_.width > 0)
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat::GpuMat(int rows_, int cols_, int type_, Scalar s_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||||
|
{
|
||||||
|
if (rows_ > 0 && cols_ > 0)
|
||||||
|
{
|
||||||
|
create(rows_, cols_, type_);
|
||||||
|
setTo(s_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat::GpuMat(Size size_, int type_, Scalar s_)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||||
|
{
|
||||||
|
if (size_.height > 0 && size_.width > 0)
|
||||||
|
{
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
setTo(s_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat::~GpuMat()
|
||||||
|
{
|
||||||
|
release();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::clone() const
|
||||||
|
{
|
||||||
|
GpuMat m;
|
||||||
|
copyTo(m);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void GpuMat::assignTo(GpuMat& m, int type) const
|
||||||
|
{
|
||||||
|
if (type < 0)
|
||||||
|
m = *this;
|
||||||
|
else
|
||||||
|
convertTo(m, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t GpuMat::step1() const
|
||||||
|
{
|
||||||
|
return step / elemSize1();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool GpuMat::empty() const
|
||||||
|
{
|
||||||
|
return data == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> inline _Tp* GpuMat::ptr(int y)
|
||||||
|
{
|
||||||
|
return (_Tp*)ptr(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> inline const _Tp* GpuMat::ptr(int y) const
|
||||||
|
{
|
||||||
|
return (const _Tp*)ptr(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void swap(GpuMat& a, GpuMat& b)
|
||||||
|
{
|
||||||
|
a.swap(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::row(int y) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range(y, y+1), Range::all());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::col(int x) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range::all(), Range(x, x+1));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::rowRange(int startrow, int endrow) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range(startrow, endrow), Range::all());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::rowRange(Range r) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, r, Range::all());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::colRange(int startcol, int endcol) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range::all(), Range(startcol, endcol));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::colRange(Range r) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, Range::all(), r);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void GpuMat::create(Size size_, int type_)
|
||||||
|
{
|
||||||
|
create(size_.height, size_.width, type_);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::operator()(Range rowRange, Range colRange) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, rowRange, colRange);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat GpuMat::operator()(Rect roi) const
|
||||||
|
{
|
||||||
|
return GpuMat(*this, roi);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool GpuMat::isContinuous() const
|
||||||
|
{
|
||||||
|
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t GpuMat::elemSize() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline size_t GpuMat::elemSize1() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE1(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int GpuMat::type() const
|
||||||
|
{
|
||||||
|
return CV_MAT_TYPE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int GpuMat::depth() const
|
||||||
|
{
|
||||||
|
return CV_MAT_DEPTH(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int GpuMat::channels() const
|
||||||
|
{
|
||||||
|
return CV_MAT_CN(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Size GpuMat::size() const
|
||||||
|
{
|
||||||
|
return Size(cols, rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uchar* GpuMat::ptr(int y)
|
||||||
|
{
|
||||||
|
CV_DbgAssert((unsigned)y < (unsigned)rows);
|
||||||
|
return data + step * y;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const uchar* GpuMat::ptr(int y) const
|
||||||
|
{
|
||||||
|
CV_DbgAssert((unsigned)y < (unsigned)rows);
|
||||||
|
return data + step * y;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat& GpuMat::operator = (Scalar s)
|
||||||
|
{
|
||||||
|
setTo(s);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T> inline GpuMat::operator DevMem2D_<T>() const
|
||||||
|
{
|
||||||
|
return DevMem2D_<T>(rows, cols, (T*)data, step);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class T> inline GpuMat::operator PtrStep_<T>() const
|
||||||
|
{
|
||||||
|
return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat createContinuous(int rows, int cols, int type)
|
||||||
|
{
|
||||||
|
GpuMat m;
|
||||||
|
createContinuous(rows, cols, type, m);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void createContinuous(Size size, int type, GpuMat& m)
|
||||||
|
{
|
||||||
|
createContinuous(size.height, size.width, type, m);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline GpuMat createContinuous(Size size, int type)
|
||||||
|
{
|
||||||
|
GpuMat m;
|
||||||
|
createContinuous(size, type, m);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void ensureSizeIsEnough(Size size, int type, GpuMat& m)
|
||||||
|
{
|
||||||
|
ensureSizeIsEnough(size.height, size.width, type, m);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void createContinuous(int rows, int cols, int type, GpuMat& m)
|
||||||
|
{
|
||||||
|
int area = rows * cols;
|
||||||
|
if (!m.isContinuous() || m.type() != type || m.size().area() != area)
|
||||||
|
m.create(1, area, type);
|
||||||
|
m = m.reshape(0, rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
|
||||||
|
{
|
||||||
|
if (m.type() == type && m.rows >= rows && m.cols >= cols)
|
||||||
|
m = m(Rect(0, 0, cols, rows));
|
||||||
|
else
|
||||||
|
m.create(rows, cols, type);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
|
||||||
|
#endif // __OPENCV_GPUMAT_HPP__
|
460
modules/core/src/gpumat.cpp
Normal file
460
modules/core/src/gpumat.cpp
Normal file
@ -0,0 +1,460 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include "opencv2/core/gpumat.hpp"
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::gpu;
|
||||||
|
|
||||||
|
cv::gpu::GpuMat::GpuMat(const GpuMat& m)
|
||||||
|
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
|
||||||
|
{
|
||||||
|
if (refcount)
|
||||||
|
CV_XADD(refcount, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
|
||||||
|
flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(rows_), cols(cols_),
|
||||||
|
step(step_), data((uchar*)data_), refcount(0),
|
||||||
|
datastart((uchar*)data_), dataend((uchar*)data_)
|
||||||
|
{
|
||||||
|
size_t minstep = cols * elemSize();
|
||||||
|
|
||||||
|
if (step == Mat::AUTO_STEP)
|
||||||
|
{
|
||||||
|
step = minstep;
|
||||||
|
flags |= Mat::CONTINUOUS_FLAG;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (rows == 1)
|
||||||
|
step = minstep;
|
||||||
|
|
||||||
|
CV_DbgAssert(step >= minstep);
|
||||||
|
|
||||||
|
flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
|
||||||
|
}
|
||||||
|
dataend += step * (rows - 1) + minstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
|
||||||
|
flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(size_.height), cols(size_.width),
|
||||||
|
step(step_), data((uchar*)data_), refcount(0),
|
||||||
|
datastart((uchar*)data_), dataend((uchar*)data_)
|
||||||
|
{
|
||||||
|
size_t minstep = cols * elemSize();
|
||||||
|
|
||||||
|
if (step == Mat::AUTO_STEP)
|
||||||
|
{
|
||||||
|
step = minstep;
|
||||||
|
flags |= Mat::CONTINUOUS_FLAG;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (rows == 1)
|
||||||
|
step = minstep;
|
||||||
|
|
||||||
|
CV_DbgAssert(step >= minstep);
|
||||||
|
|
||||||
|
flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
|
||||||
|
}
|
||||||
|
dataend += step * (rows - 1) + minstep;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::GpuMat::GpuMat(const GpuMat& m, Range rowRange, Range colRange)
|
||||||
|
{
|
||||||
|
flags = m.flags;
|
||||||
|
step = m.step; refcount = m.refcount;
|
||||||
|
data = m.data; datastart = m.datastart; dataend = m.dataend;
|
||||||
|
|
||||||
|
if (rowRange == Range::all())
|
||||||
|
rows = m.rows;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Assert(0 <= rowRange.start && rowRange.start <= rowRange.end && rowRange.end <= m.rows);
|
||||||
|
|
||||||
|
rows = rowRange.size();
|
||||||
|
data += step*rowRange.start;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (colRange == Range::all())
|
||||||
|
cols = m.cols;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Assert(0 <= colRange.start && colRange.start <= colRange.end && colRange.end <= m.cols);
|
||||||
|
|
||||||
|
cols = colRange.size();
|
||||||
|
data += colRange.start*elemSize();
|
||||||
|
flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rows == 1)
|
||||||
|
flags |= Mat::CONTINUOUS_FLAG;
|
||||||
|
|
||||||
|
if (refcount)
|
||||||
|
CV_XADD(refcount, 1);
|
||||||
|
|
||||||
|
if (rows <= 0 || cols <= 0)
|
||||||
|
rows = cols = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::GpuMat::GpuMat(const GpuMat& m, Rect roi) :
|
||||||
|
flags(m.flags), rows(roi.height), cols(roi.width),
|
||||||
|
step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
|
||||||
|
datastart(m.datastart), dataend(m.dataend)
|
||||||
|
{
|
||||||
|
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
||||||
|
data += roi.x * elemSize();
|
||||||
|
|
||||||
|
CV_Assert(0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows);
|
||||||
|
|
||||||
|
if (refcount)
|
||||||
|
CV_XADD(refcount, 1);
|
||||||
|
|
||||||
|
if (rows <= 0 || cols <= 0)
|
||||||
|
rows = cols = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::GpuMat::GpuMat(const Mat& m) :
|
||||||
|
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
||||||
|
{
|
||||||
|
upload(m);
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuMat& cv::gpu::GpuMat::operator = (const GpuMat& m)
|
||||||
|
{
|
||||||
|
if (this != &m)
|
||||||
|
{
|
||||||
|
GpuMat temp(m);
|
||||||
|
swap(temp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::swap(GpuMat& b)
|
||||||
|
{
|
||||||
|
std::swap(flags, b.flags);
|
||||||
|
std::swap(rows, b.rows);
|
||||||
|
std::swap(cols, b.cols);
|
||||||
|
std::swap(step, b.step);
|
||||||
|
std::swap(data, b.data);
|
||||||
|
std::swap(datastart, b.datastart);
|
||||||
|
std::swap(dataend, b.dataend);
|
||||||
|
std::swap(refcount, b.refcount);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::locateROI(Size& wholeSize, Point& ofs) const
|
||||||
|
{
|
||||||
|
size_t esz = elemSize();
|
||||||
|
ptrdiff_t delta1 = data - datastart;
|
||||||
|
ptrdiff_t delta2 = dataend - datastart;
|
||||||
|
|
||||||
|
CV_DbgAssert(step > 0);
|
||||||
|
|
||||||
|
if (delta1 == 0)
|
||||||
|
ofs.x = ofs.y = 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ofs.y = static_cast<int>(delta1 / step);
|
||||||
|
ofs.x = static_cast<int>((delta1 - step * ofs.y) / esz);
|
||||||
|
|
||||||
|
CV_DbgAssert(data == datastart + ofs.y * step + ofs.x * esz);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t minstep = (ofs.x + cols) * esz;
|
||||||
|
|
||||||
|
wholeSize.height = std::max(static_cast<int>((delta2 - minstep) / step + 1), ofs.y + rows);
|
||||||
|
wholeSize.width = std::max(static_cast<int>((delta2 - step * (wholeSize.height - 1)) / esz), ofs.x + cols);
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuMat& cv::gpu::GpuMat::adjustROI(int dtop, int dbottom, int dleft, int dright)
|
||||||
|
{
|
||||||
|
Size wholeSize;
|
||||||
|
Point ofs;
|
||||||
|
locateROI(wholeSize, ofs);
|
||||||
|
|
||||||
|
size_t esz = elemSize();
|
||||||
|
|
||||||
|
int row1 = std::max(ofs.y - dtop, 0);
|
||||||
|
int row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
|
||||||
|
|
||||||
|
int col1 = std::max(ofs.x - dleft, 0);
|
||||||
|
int col2 = std::min(ofs.x + cols + dright, wholeSize.width);
|
||||||
|
|
||||||
|
data += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
|
||||||
|
rows = row2 - row1;
|
||||||
|
cols = col2 - col1;
|
||||||
|
|
||||||
|
if (esz * cols == step || rows == 1)
|
||||||
|
flags |= Mat::CONTINUOUS_FLAG;
|
||||||
|
else
|
||||||
|
flags &= ~Mat::CONTINUOUS_FLAG;
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
|
||||||
|
{
|
||||||
|
GpuMat hdr = *this;
|
||||||
|
|
||||||
|
int cn = channels();
|
||||||
|
if (new_cn == 0)
|
||||||
|
new_cn = cn;
|
||||||
|
|
||||||
|
int total_width = cols * cn;
|
||||||
|
|
||||||
|
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
|
||||||
|
new_rows = rows * total_width / new_cn;
|
||||||
|
|
||||||
|
if (new_rows != 0 && new_rows != rows)
|
||||||
|
{
|
||||||
|
int total_size = total_width * rows;
|
||||||
|
|
||||||
|
if (!isContinuous())
|
||||||
|
CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
|
||||||
|
|
||||||
|
if ((unsigned)new_rows > (unsigned)total_size)
|
||||||
|
CV_Error(CV_StsOutOfRange, "Bad new number of rows");
|
||||||
|
|
||||||
|
total_width = total_size / new_rows;
|
||||||
|
|
||||||
|
if (total_width * new_rows != total_size)
|
||||||
|
CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
|
||||||
|
|
||||||
|
hdr.rows = new_rows;
|
||||||
|
hdr.step = total_width * elemSize1();
|
||||||
|
}
|
||||||
|
|
||||||
|
int new_width = total_width / new_cn;
|
||||||
|
|
||||||
|
if (new_width * new_cn != total_width)
|
||||||
|
CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
|
||||||
|
|
||||||
|
hdr.cols = new_width;
|
||||||
|
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
|
||||||
|
|
||||||
|
return hdr;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::Mat::Mat(const GpuMat& m) : flags(0), dims(0), rows(0), cols(0), data(0), refcount(0), datastart(0), dataend(0), datalimit(0), allocator(0), size(&rows)
|
||||||
|
{
|
||||||
|
m.download(*this);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
void throw_nogpu()
|
||||||
|
{
|
||||||
|
CV_Error(CV_GpuNotSupported, "The library is compiled without GPU support");
|
||||||
|
}
|
||||||
|
|
||||||
|
class EmptyFuncTable : public GpuFuncTable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void copy(const Mat&, GpuMat&) const { throw_nogpu(); }
|
||||||
|
void copy(const GpuMat&, Mat&) const { throw_nogpu(); }
|
||||||
|
void copy(const GpuMat&, GpuMat&) const { throw_nogpu(); }
|
||||||
|
|
||||||
|
void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_nogpu(); }
|
||||||
|
|
||||||
|
void convert(const GpuMat&, GpuMat&) const { throw_nogpu(); }
|
||||||
|
void convert(const GpuMat&, GpuMat&, double, double) const { throw_nogpu(); }
|
||||||
|
|
||||||
|
void setTo(GpuMat&, Scalar, const GpuMat&) const { throw_nogpu(); }
|
||||||
|
|
||||||
|
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu(); }
|
||||||
|
void free(void*) const {}
|
||||||
|
};
|
||||||
|
|
||||||
|
const GpuFuncTable* g_funcTbl = 0;
|
||||||
|
|
||||||
|
const GpuFuncTable* gpuFuncTable()
|
||||||
|
{
|
||||||
|
static EmptyFuncTable empty;
|
||||||
|
return g_funcTbl ? g_funcTbl : ∅
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::setGpuFuncTable(const GpuFuncTable* funcTbl)
|
||||||
|
{
|
||||||
|
g_funcTbl = funcTbl;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::upload(const Mat& m)
|
||||||
|
{
|
||||||
|
CV_DbgAssert(!m.empty());
|
||||||
|
|
||||||
|
create(m.size(), m.type());
|
||||||
|
|
||||||
|
gpuFuncTable()->copy(m, *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::download(Mat& m) const
|
||||||
|
{
|
||||||
|
CV_DbgAssert(!empty());
|
||||||
|
|
||||||
|
m.create(size(), type());
|
||||||
|
|
||||||
|
gpuFuncTable()->copy(*this, m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::copyTo(GpuMat& m) const
|
||||||
|
{
|
||||||
|
CV_DbgAssert(!empty());
|
||||||
|
|
||||||
|
m.create(size(), type());
|
||||||
|
|
||||||
|
gpuFuncTable()->copy(*this, m);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const
|
||||||
|
{
|
||||||
|
if (mask.empty())
|
||||||
|
copyTo(mat);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mat.create(size(), type());
|
||||||
|
|
||||||
|
gpuFuncTable()->copyWithMask(*this, mat, mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::convertTo(GpuMat& dst, int rtype, double alpha, double beta) const
|
||||||
|
{
|
||||||
|
bool noScale = fabs(alpha - 1) < numeric_limits<double>::epsilon() && fabs(beta) < numeric_limits<double>::epsilon();
|
||||||
|
|
||||||
|
if (rtype < 0)
|
||||||
|
rtype = type();
|
||||||
|
else
|
||||||
|
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
|
||||||
|
|
||||||
|
int sdepth = depth();
|
||||||
|
int ddepth = CV_MAT_DEPTH(rtype);
|
||||||
|
if (sdepth == ddepth && noScale)
|
||||||
|
{
|
||||||
|
copyTo(dst);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuMat temp;
|
||||||
|
const GpuMat* psrc = this;
|
||||||
|
if (sdepth != ddepth && psrc == &dst)
|
||||||
|
{
|
||||||
|
temp = *this;
|
||||||
|
psrc = &temp;
|
||||||
|
}
|
||||||
|
|
||||||
|
dst.create(size(), rtype);
|
||||||
|
|
||||||
|
if (noScale)
|
||||||
|
gpuFuncTable()->convert(*psrc, dst);
|
||||||
|
else
|
||||||
|
gpuFuncTable()->convert(*psrc, dst, alpha, beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
GpuMat& cv::gpu::GpuMat::setTo(Scalar s, const GpuMat& mask)
|
||||||
|
{
|
||||||
|
CV_Assert(mask.empty() || mask.type() == CV_8UC1);
|
||||||
|
CV_DbgAssert(!empty());
|
||||||
|
|
||||||
|
gpuFuncTable()->setTo(*this, s, mask);
|
||||||
|
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
|
||||||
|
{
|
||||||
|
_type &= TYPE_MASK;
|
||||||
|
|
||||||
|
if (rows == _rows && cols == _cols && type() == _type && data)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (data)
|
||||||
|
release();
|
||||||
|
|
||||||
|
CV_DbgAssert(_rows >= 0 && _cols >= 0);
|
||||||
|
|
||||||
|
if (_rows > 0 && _cols > 0)
|
||||||
|
{
|
||||||
|
flags = Mat::MAGIC_VAL + _type;
|
||||||
|
rows = _rows;
|
||||||
|
cols = _cols;
|
||||||
|
|
||||||
|
size_t esz = elemSize();
|
||||||
|
|
||||||
|
void* devPtr;
|
||||||
|
gpuFuncTable()->mallocPitch(&devPtr, &step, esz * cols, rows);
|
||||||
|
|
||||||
|
// Single row must be continuous
|
||||||
|
if (rows == 1)
|
||||||
|
step = esz * cols;
|
||||||
|
|
||||||
|
if (esz * cols == step)
|
||||||
|
flags |= Mat::CONTINUOUS_FLAG;
|
||||||
|
|
||||||
|
int64 _nettosize = static_cast<int64>(step) * rows;
|
||||||
|
size_t nettosize = static_cast<size_t>(_nettosize);
|
||||||
|
|
||||||
|
datastart = data = static_cast<uchar*>(devPtr);
|
||||||
|
dataend = data + nettosize;
|
||||||
|
|
||||||
|
refcount = static_cast<int*>(fastMalloc(sizeof(*refcount)));
|
||||||
|
*refcount = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::GpuMat::release()
|
||||||
|
{
|
||||||
|
if (refcount && CV_XADD(refcount, -1) == 1)
|
||||||
|
{
|
||||||
|
fastFree(refcount);
|
||||||
|
|
||||||
|
gpuFuncTable()->free(datastart);
|
||||||
|
}
|
||||||
|
|
||||||
|
data = datastart = dataend = 0;
|
||||||
|
step = rows = cols = 0;
|
||||||
|
refcount = 0;
|
||||||
|
}
|
@ -3,7 +3,8 @@ set(name "gpu")
|
|||||||
set(the_target "opencv_${name}")
|
set(the_target "opencv_${name}")
|
||||||
project(${the_target})
|
project(${the_target})
|
||||||
|
|
||||||
set(DEPS "opencv_core" "opencv_imgproc" "opencv_objdetect" "opencv_features2d" "opencv_flann" "opencv_calib3d") #"opencv_features2d" "opencv_flann" "opencv_objdetect" - only headers needed
|
set(DEPS "opencv_core" "opencv_imgproc" "opencv_calib3d" "opencv_objdetect")
|
||||||
|
set(DEPS_HEADER ${DEPS} "opencv_features2d" "opencv_flann")
|
||||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
|
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} opencv_gpu)
|
||||||
|
|
||||||
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include"
|
||||||
@ -27,6 +28,13 @@ file(GLOB lib_device_hdrs_detail "src/opencv2/gpu/device/detail/*.h*")
|
|||||||
source_group("Device" FILES ${lib_device_hdrs})
|
source_group("Device" FILES ${lib_device_hdrs})
|
||||||
source_group("Device\\Detail" FILES ${lib_device_hdrs_detail})
|
source_group("Device\\Detail" FILES ${lib_device_hdrs_detail})
|
||||||
|
|
||||||
|
foreach(d ${DEPS_HEADER})
|
||||||
|
if(${d} MATCHES "opencv_")
|
||||||
|
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
|
||||||
|
include_directories("${d_dir}/include")
|
||||||
|
endif()
|
||||||
|
endforeach()
|
||||||
|
|
||||||
if (HAVE_CUDA)
|
if (HAVE_CUDA)
|
||||||
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
|
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
|
||||||
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
|
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
|
||||||
@ -51,7 +59,6 @@ if (HAVE_CUDA)
|
|||||||
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fno-finite-math-only;")
|
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-fno-finite-math-only;")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||||
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||||
@ -60,7 +67,7 @@ if (HAVE_CUDA)
|
|||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}")
|
||||||
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
#string(REPLACE "/W4" "/W3" CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG}")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4211 /wd4201 /wd4100 /wd4505 /wd4408 /wd4251")
|
||||||
|
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||||
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
|
string(REPLACE "/EHsc-" "/EHs" CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE}")
|
||||||
@ -74,17 +81,14 @@ if (HAVE_CUDA)
|
|||||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-DCVAPI_EXPORTS")
|
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;-DCVAPI_EXPORTS")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(MSVC)
|
||||||
|
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/wd4251")
|
||||||
|
endif()
|
||||||
|
|
||||||
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
|
CUDA_COMPILE(cuda_objs ${lib_cuda} ${ncv_cuda})
|
||||||
#CUDA_BUILD_CLEAN_TARGET()
|
#CUDA_BUILD_CLEAN_TARGET()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
foreach(d ${DEPS})
|
|
||||||
if(${d} MATCHES "opencv_")
|
|
||||||
string(REPLACE "opencv_" "${CMAKE_CURRENT_SOURCE_DIR}/../" d_dir ${d})
|
|
||||||
include_directories("${d_dir}/include")
|
|
||||||
endif()
|
|
||||||
endforeach()
|
|
||||||
|
|
||||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
||||||
|
|
||||||
# For dynamic link numbering convenions
|
# For dynamic link numbering convenions
|
||||||
|
@ -40,122 +40,4 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#ifndef __OPENCV_GPU_DevMem2D_HPP__
|
#include "opencv2/core/devmem2d.hpp"
|
||||||
#define __OPENCV_GPU_DevMem2D_HPP__
|
|
||||||
|
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
namespace gpu
|
|
||||||
{
|
|
||||||
// Simple lightweight structures that encapsulates information about an image on device.
|
|
||||||
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
|
|
||||||
|
|
||||||
#if defined(__CUDACC__)
|
|
||||||
#define __CV_GPU_HOST_DEVICE__ __host__ __device__ __forceinline__
|
|
||||||
#else
|
|
||||||
#define __CV_GPU_HOST_DEVICE__
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <bool expr> struct StaticAssert;
|
|
||||||
template <> struct StaticAssert<true> {static __CV_GPU_HOST_DEVICE__ void check(){}};
|
|
||||||
|
|
||||||
template<typename T> struct DevPtr
|
|
||||||
{
|
|
||||||
typedef T elem_type;
|
|
||||||
typedef int index_type;
|
|
||||||
|
|
||||||
enum { elem_size = sizeof(elem_type) };
|
|
||||||
|
|
||||||
T* data;
|
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ DevPtr() : data(0) {}
|
|
||||||
__CV_GPU_HOST_DEVICE__ DevPtr(T* data_) : data(data_) {}
|
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
|
|
||||||
__CV_GPU_HOST_DEVICE__ operator T*() { return data; }
|
|
||||||
__CV_GPU_HOST_DEVICE__ operator const T*() const { return data; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct PtrSz : public DevPtr<T>
|
|
||||||
{
|
|
||||||
__CV_GPU_HOST_DEVICE__ PtrSz() : size(0) {}
|
|
||||||
__CV_GPU_HOST_DEVICE__ PtrSz(T* data_, size_t size_) : DevPtr<T>(data_), size(size_) {}
|
|
||||||
|
|
||||||
size_t size;
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct PtrStep : public DevPtr<T>
|
|
||||||
{
|
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStep() : step(0) {}
|
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStep(T* data_, size_t step_) : DevPtr<T>(data_), step(step_) {}
|
|
||||||
|
|
||||||
/** \brief stride between two consecutive rows in bytes. Step is stored always and everywhere in bytes!!! */
|
|
||||||
size_t step;
|
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return ( T*)( ( char*)DevPtr<T>::data + y * step); }
|
|
||||||
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)DevPtr<T>::data + y * step); }
|
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
|
||||||
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T> struct PtrStepSz : public PtrStep<T>
|
|
||||||
{
|
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStepSz() : cols(0), rows(0) {}
|
|
||||||
__CV_GPU_HOST_DEVICE__ PtrStepSz(int rows_, int cols_, T* data_, size_t step_)
|
|
||||||
: PtrStep<T>(data_, step_), cols(cols_), rows(rows_) {}
|
|
||||||
|
|
||||||
int cols;
|
|
||||||
int rows;
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T> struct DevMem2D_ : public PtrStepSz<T>
|
|
||||||
{
|
|
||||||
DevMem2D_() {}
|
|
||||||
DevMem2D_(int rows_, int cols_, T *data_, size_t step_) : PtrStepSz<T>(rows_, cols_, data_, step_) {}
|
|
||||||
|
|
||||||
template <typename U>
|
|
||||||
explicit DevMem2D_(const DevMem2D_<U>& d) : PtrStepSz<T>(d.rows, d.cols, (T*)d.data, d.step) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct PtrElemStep_ : public PtrStep<T>
|
|
||||||
{
|
|
||||||
PtrElemStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step)
|
|
||||||
{
|
|
||||||
StaticAssert<256 % sizeof(T) == 0>::check();
|
|
||||||
|
|
||||||
PtrStep<T>::step /= PtrStep<T>::elem_size;
|
|
||||||
}
|
|
||||||
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep<T>::data + y * PtrStep<T>::step; }
|
|
||||||
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep<T>::data + y * PtrStep<T>::step; }
|
|
||||||
|
|
||||||
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
|
|
||||||
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
|
|
||||||
};
|
|
||||||
|
|
||||||
template<typename T> struct PtrStep_ : public PtrStep<T>
|
|
||||||
{
|
|
||||||
PtrStep_() {}
|
|
||||||
PtrStep_(const DevMem2D_<T>& mem) : PtrStep<T>(mem.data, mem.step) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
#undef __CV_GPU_HOST_DEVICE__
|
|
||||||
|
|
||||||
|
|
||||||
typedef DevMem2D_<unsigned char> DevMem2Db;
|
|
||||||
typedef DevMem2Db DevMem2D;
|
|
||||||
typedef DevMem2D_<float> DevMem2Df;
|
|
||||||
typedef DevMem2D_<int> DevMem2Di;
|
|
||||||
|
|
||||||
typedef PtrStep<unsigned char> PtrStepb;
|
|
||||||
typedef PtrStep<float> PtrStepf;
|
|
||||||
typedef PtrStep<int> PtrStepi;
|
|
||||||
|
|
||||||
typedef PtrElemStep_<unsigned char> PtrElemStep;
|
|
||||||
typedef PtrElemStep_<float> PtrElemStepf;
|
|
||||||
typedef PtrElemStep_<int> PtrElemStepi;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_DevMem2D_HPP__ */
|
|
||||||
|
@ -43,17 +43,17 @@
|
|||||||
#ifndef __OPENCV_GPU_HPP__
|
#ifndef __OPENCV_GPU_HPP__
|
||||||
#define __OPENCV_GPU_HPP__
|
#define __OPENCV_GPU_HPP__
|
||||||
|
|
||||||
|
#ifndef SKIP_INCLUDES
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "opencv2/core/core.hpp"
|
#endif
|
||||||
|
|
||||||
|
#include "opencv2/core/gpumat.hpp"
|
||||||
#include "opencv2/imgproc/imgproc.hpp"
|
#include "opencv2/imgproc/imgproc.hpp"
|
||||||
#include "opencv2/objdetect/objdetect.hpp"
|
#include "opencv2/objdetect/objdetect.hpp"
|
||||||
#include "opencv2/features2d/features2d.hpp"
|
#include "opencv2/features2d/features2d.hpp"
|
||||||
#include "opencv2/gpu/gpumat.hpp"
|
|
||||||
|
|
||||||
namespace cv
|
namespace cv { namespace gpu {
|
||||||
{
|
|
||||||
namespace gpu
|
|
||||||
{
|
|
||||||
//////////////////////////////// Initialization & Info ////////////////////////
|
//////////////////////////////// Initialization & Info ////////////////////////
|
||||||
|
|
||||||
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
|
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
|
||||||
@ -108,7 +108,7 @@ namespace cv
|
|||||||
// Creates DeviceInfo object for the given GPU
|
// Creates DeviceInfo object for the given GPU
|
||||||
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
|
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
|
||||||
|
|
||||||
string name() const { return name_; }
|
std::string name() const { return name_; }
|
||||||
|
|
||||||
// Return compute capability versions
|
// Return compute capability versions
|
||||||
int majorVersion() const { return majorVersion_; }
|
int majorVersion() const { return majorVersion_; }
|
||||||
@ -133,7 +133,7 @@ namespace cv
|
|||||||
|
|
||||||
int device_id_;
|
int device_id_;
|
||||||
|
|
||||||
string name_;
|
std::string name_;
|
||||||
int multi_processor_count_;
|
int multi_processor_count_;
|
||||||
int majorVersion_;
|
int majorVersion_;
|
||||||
int minorVersion_;
|
int minorVersion_;
|
||||||
@ -433,19 +433,27 @@ namespace cv
|
|||||||
CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
|
CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! a synonym for normalized box filter
|
//! a synonym for normalized box filter
|
||||||
static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) { boxFilter(src, dst, -1, ksize, anchor, stream); }
|
static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null())
|
||||||
|
{
|
||||||
|
boxFilter(src, dst, -1, ksize, anchor, stream);
|
||||||
|
}
|
||||||
|
|
||||||
//! erodes the image (applies the local minimum operator)
|
//! erodes the image (applies the local minimum operator)
|
||||||
CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
||||||
CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
|
||||||
|
Point anchor = Point(-1, -1), int iterations = 1,
|
||||||
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! dilates the image (applies the local maximum operator)
|
//! dilates the image (applies the local maximum operator)
|
||||||
CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
||||||
CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf,
|
||||||
|
Point anchor = Point(-1, -1), int iterations = 1,
|
||||||
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies an advanced morphological operation to the image
|
//! applies an advanced morphological operation to the image
|
||||||
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1);
|
||||||
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
|
||||||
|
Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies non-separable 2D linear filter to the image
|
//! applies non-separable 2D linear filter to the image
|
||||||
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null());
|
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), Stream& stream = Stream::Null());
|
||||||
@ -454,7 +462,8 @@ namespace cv
|
|||||||
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
||||||
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||||
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
|
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf,
|
||||||
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
|
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1,
|
||||||
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies generalized Sobel operator to the image
|
//! applies generalized Sobel operator to the image
|
||||||
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
|
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1,
|
||||||
@ -631,11 +640,13 @@ namespace cv
|
|||||||
|
|
||||||
//! Does mean shift filtering on GPU.
|
//! Does mean shift filtering on GPU.
|
||||||
CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
|
CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
|
||||||
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null());
|
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
|
||||||
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! Does mean shift procedure on GPU.
|
//! Does mean shift procedure on GPU.
|
||||||
CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
|
CV_EXPORTS void meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr,
|
||||||
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1), Stream& stream = Stream::Null());
|
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
|
||||||
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! Does mean shift segmentation with elimination of small regions.
|
//! Does mean shift segmentation with elimination of small regions.
|
||||||
CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
|
CV_EXPORTS void meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr, int minsize,
|
||||||
@ -720,10 +731,8 @@ namespace cv
|
|||||||
CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
|
CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! computes Harris cornerness criteria at each image pixel
|
//! computes Harris cornerness criteria at each image pixel
|
||||||
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k,
|
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
|
||||||
int borderType = BORDER_REFLECT101);
|
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k, int borderType = BORDER_REFLECT101);
|
||||||
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, double k,
|
|
||||||
int borderType = BORDER_REFLECT101);
|
|
||||||
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
|
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k,
|
||||||
int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
|
int borderType = BORDER_REFLECT101, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
@ -902,12 +911,13 @@ namespace cv
|
|||||||
CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
CV_EXPORTS void solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat,
|
||||||
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
|
const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false,
|
||||||
int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
|
int num_iters=100, float max_dist=8.0, int min_inlier_count=100,
|
||||||
vector<int>* inliers=NULL);
|
std::vector<int>* inliers=NULL);
|
||||||
|
|
||||||
//////////////////////////////// Image Labeling ////////////////////////////////
|
//////////////////////////////// Image Labeling ////////////////////////////////
|
||||||
|
|
||||||
//!performs labeling via graph cuts
|
//!performs labeling via graph cuts
|
||||||
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels, GpuMat& buf, Stream& stream = Stream::Null());
|
CV_EXPORTS void graphcut(GpuMat& terminals, GpuMat& leftTransp, GpuMat& rightTransp, GpuMat& top, GpuMat& bottom, GpuMat& labels,
|
||||||
|
GpuMat& buf, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
////////////////////////////////// Histograms //////////////////////////////////
|
////////////////////////////////// Histograms //////////////////////////////////
|
||||||
|
|
||||||
@ -980,6 +990,7 @@ namespace cv
|
|||||||
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
|
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
|
||||||
// i.e. input left image is low textured.
|
// i.e. input left image is low textured.
|
||||||
float avergeTexThreshold;
|
float avergeTexThreshold;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
GpuMat minSSD, leBuf, riBuf;
|
GpuMat minSSD, leBuf, riBuf;
|
||||||
};
|
};
|
||||||
@ -1403,11 +1414,11 @@ namespace cv
|
|||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
CascadeClassifier_GPU();
|
CascadeClassifier_GPU();
|
||||||
CascadeClassifier_GPU(const string& filename);
|
CascadeClassifier_GPU(const std::string& filename);
|
||||||
~CascadeClassifier_GPU();
|
~CascadeClassifier_GPU();
|
||||||
|
|
||||||
bool empty() const;
|
bool empty() const;
|
||||||
bool load(const string& filename);
|
bool load(const std::string& filename);
|
||||||
void release();
|
void release();
|
||||||
|
|
||||||
/* returns number of detected objects */
|
/* returns number of detected objects */
|
||||||
@ -1549,14 +1560,13 @@ namespace cv
|
|||||||
float pos, GpuMat& newFrame, GpuMat& buf,
|
float pos, GpuMat& newFrame, GpuMat& buf,
|
||||||
Stream& stream = Stream::Null());
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
}
|
} // namespace gpu
|
||||||
|
|
||||||
//! Speckle filtering - filters small connected components on diparity image.
|
//! Speckle filtering - filters small connected components on diparity image.
|
||||||
//! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.
|
//! It sets pixel (x,y) to newVal if it coresponds to small CC with size < maxSpeckleSize.
|
||||||
//! Threshold for border between CC is diffThreshold;
|
//! Threshold for border between CC is diffThreshold;
|
||||||
CV_EXPORTS void filterSpeckles(Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf);
|
CV_EXPORTS void filterSpeckles(Mat& img, uchar newVal, int maxSpeckleSize, uchar diffThreshold, Mat& buf);
|
||||||
|
|
||||||
}
|
} // namespace cv
|
||||||
#include "opencv2/gpu/matrix_operations.hpp"
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_HPP__ */
|
#endif /* __OPENCV_GPU_HPP__ */
|
||||||
|
@ -40,427 +40,4 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#ifndef __OPENCV_GPUMAT_HPP__
|
#include "opencv2/core/gpumat.hpp"
|
||||||
#define __OPENCV_GPUMAT_HPP__
|
|
||||||
|
|
||||||
#include "opencv2/core/core.hpp"
|
|
||||||
#include "opencv2/gpu/devmem2d.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace gpu
|
|
||||||
{
|
|
||||||
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
|
|
||||||
class CV_EXPORTS GpuMat
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
|
|
||||||
// Contains just image size, data ptr and step.
|
|
||||||
template <class T> operator DevMem2D_<T>() const;
|
|
||||||
template <class T> operator PtrStep_<T>() const;
|
|
||||||
template <class T> operator PtrStep<T>() const;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//! builds GpuMat from Mat. Perfom blocking upload to device.
|
|
||||||
explicit GpuMat(const Mat& m);
|
|
||||||
|
|
||||||
//! pefroms blocking upload data to GpuMat.
|
|
||||||
void upload(const Mat& m);
|
|
||||||
|
|
||||||
//! downloads data from device to host memory. Blocking calls.
|
|
||||||
void download(Mat& m) const;
|
|
||||||
operator Mat() const
|
|
||||||
{
|
|
||||||
Mat m;
|
|
||||||
download(m);
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//! default constructor
|
|
||||||
GpuMat();
|
|
||||||
|
|
||||||
//! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
|
|
||||||
GpuMat(int rows, int cols, int type);
|
|
||||||
GpuMat(Size size, int type);
|
|
||||||
|
|
||||||
//! constucts GpuMatrix and fills it with the specified value _s.
|
|
||||||
GpuMat(int rows, int cols, int type, const Scalar& s);
|
|
||||||
GpuMat(Size size, int type, const Scalar& s);
|
|
||||||
|
|
||||||
//! copy constructor
|
|
||||||
GpuMat(const GpuMat& m);
|
|
||||||
|
|
||||||
//! constructor for GpuMatrix headers pointing to user-allocated data
|
|
||||||
GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP);
|
|
||||||
GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP);
|
|
||||||
|
|
||||||
//! creates a matrix header for a part of the bigger matrix
|
|
||||||
GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange);
|
|
||||||
GpuMat(const GpuMat& m, const Rect& roi);
|
|
||||||
|
|
||||||
//! destructor - calls release()
|
|
||||||
~GpuMat();
|
|
||||||
|
|
||||||
//! assignment operators
|
|
||||||
GpuMat& operator = (const GpuMat& m);
|
|
||||||
|
|
||||||
//! returns a new GpuMatrix header for the specified row
|
|
||||||
GpuMat row(int y) const;
|
|
||||||
//! returns a new GpuMatrix header for the specified column
|
|
||||||
GpuMat col(int x) const;
|
|
||||||
//! ... for the specified row span
|
|
||||||
GpuMat rowRange(int startrow, int endrow) const;
|
|
||||||
GpuMat rowRange(const Range& r) const;
|
|
||||||
//! ... for the specified column span
|
|
||||||
GpuMat colRange(int startcol, int endcol) const;
|
|
||||||
GpuMat colRange(const Range& r) const;
|
|
||||||
|
|
||||||
//! returns deep copy of the GpuMatrix, i.e. the data is copied
|
|
||||||
GpuMat clone() const;
|
|
||||||
//! copies the GpuMatrix content to "m".
|
|
||||||
// It calls m.create(this->size(), this->type()).
|
|
||||||
void copyTo(GpuMat& m) const;
|
|
||||||
//! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements.
|
|
||||||
void copyTo(GpuMat& m, const GpuMat& mask) const;
|
|
||||||
//! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale.
|
|
||||||
void convertTo(GpuMat& m, int rtype, double alpha = 1, double beta = 0) const;
|
|
||||||
|
|
||||||
void assignTo(GpuMat& m, int type=-1) const;
|
|
||||||
|
|
||||||
//! sets every GpuMatrix element to s
|
|
||||||
GpuMat& operator = (const Scalar& s);
|
|
||||||
//! sets some of the GpuMatrix elements to s, according to the mask
|
|
||||||
GpuMat& setTo(const Scalar& s, const GpuMat& mask = GpuMat());
|
|
||||||
//! creates alternative GpuMatrix header for the same data, with different
|
|
||||||
// number of channels and/or different number of rows. see cvReshape.
|
|
||||||
GpuMat reshape(int cn, int rows = 0) const;
|
|
||||||
|
|
||||||
//! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type.
|
|
||||||
// previous data is unreferenced if needed.
|
|
||||||
void create(int rows, int cols, int type);
|
|
||||||
void create(Size size, int type);
|
|
||||||
//! decreases reference counter;
|
|
||||||
// deallocate the data when reference counter reaches 0.
|
|
||||||
void release();
|
|
||||||
|
|
||||||
//! swaps with other smart pointer
|
|
||||||
void swap(GpuMat& mat);
|
|
||||||
|
|
||||||
//! locates GpuMatrix header within a parent GpuMatrix. See below
|
|
||||||
void locateROI(Size& wholeSize, Point& ofs) const;
|
|
||||||
//! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix.
|
|
||||||
GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright);
|
|
||||||
//! extracts a rectangular sub-GpuMatrix
|
|
||||||
// (this is a generalized form of row, rowRange etc.)
|
|
||||||
GpuMat operator()(Range rowRange, Range colRange) const;
|
|
||||||
GpuMat operator()(const Rect& roi) const;
|
|
||||||
|
|
||||||
//! returns true iff the GpuMatrix data is continuous
|
|
||||||
// (i.e. when there are no gaps between successive rows).
|
|
||||||
// similar to CV_IS_GpuMat_CONT(cvGpuMat->type)
|
|
||||||
bool isContinuous() const;
|
|
||||||
//! returns element size in bytes,
|
|
||||||
// similar to CV_ELEM_SIZE(cvMat->type)
|
|
||||||
size_t elemSize() const;
|
|
||||||
//! returns the size of element channel in bytes.
|
|
||||||
size_t elemSize1() const;
|
|
||||||
//! returns element type, similar to CV_MAT_TYPE(cvMat->type)
|
|
||||||
int type() const;
|
|
||||||
//! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
|
|
||||||
int depth() const;
|
|
||||||
//! returns element type, similar to CV_MAT_CN(cvMat->type)
|
|
||||||
int channels() const;
|
|
||||||
//! returns step/elemSize1()
|
|
||||||
size_t step1() const;
|
|
||||||
//! returns GpuMatrix size:
|
|
||||||
// width == number of columns, height == number of rows
|
|
||||||
Size size() const;
|
|
||||||
//! returns true if GpuMatrix data is NULL
|
|
||||||
bool empty() const;
|
|
||||||
|
|
||||||
//! returns pointer to y-th row
|
|
||||||
uchar* ptr(int y = 0);
|
|
||||||
const uchar* ptr(int y = 0) const;
|
|
||||||
|
|
||||||
//! template version of the above method
|
|
||||||
template<typename _Tp> _Tp* ptr(int y = 0);
|
|
||||||
template<typename _Tp> const _Tp* ptr(int y = 0) const;
|
|
||||||
|
|
||||||
/*! includes several bit-fields:
|
|
||||||
- the magic signature
|
|
||||||
- continuity flag
|
|
||||||
- depth
|
|
||||||
- number of channels
|
|
||||||
*/
|
|
||||||
int flags;
|
|
||||||
|
|
||||||
//! the number of rows and columns
|
|
||||||
int rows, cols;
|
|
||||||
|
|
||||||
//! a distance between successive rows in bytes; includes the gap if any
|
|
||||||
size_t step;
|
|
||||||
|
|
||||||
//! pointer to the data
|
|
||||||
uchar* data;
|
|
||||||
|
|
||||||
//! pointer to the reference counter;
|
|
||||||
// when GpuMatrix points to user-allocated data, the pointer is NULL
|
|
||||||
int* refcount;
|
|
||||||
|
|
||||||
//! helper fields used in locateROI and adjustROI
|
|
||||||
uchar* datastart;
|
|
||||||
uchar* dataend;
|
|
||||||
};
|
|
||||||
|
|
||||||
//! Creates continuous GPU matrix
|
|
||||||
CV_EXPORTS void createContinuous(int rows, int cols, int type, GpuMat& m);
|
|
||||||
CV_EXPORTS GpuMat createContinuous(int rows, int cols, int type);
|
|
||||||
CV_EXPORTS void createContinuous(Size size, int type, GpuMat& m);
|
|
||||||
CV_EXPORTS GpuMat createContinuous(Size size, int type);
|
|
||||||
|
|
||||||
//! Ensures that size of the given matrix is not less than (rows, cols) size
|
|
||||||
//! and matrix type is match specified one too
|
|
||||||
CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m);
|
|
||||||
CV_EXPORTS void ensureSizeIsEnough(Size size, int type, GpuMat& m);
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
template <class T> inline GpuMat::operator DevMem2D_<T>() const { return DevMem2D_<T>(rows, cols, (T*)data, step); }
|
|
||||||
template <class T> inline GpuMat::operator PtrStep_<T>() const { return PtrStep_<T>(static_cast< DevMem2D_<T> >(*this)); }
|
|
||||||
template <class T> inline GpuMat::operator PtrStep<T>() const { return PtrStep<T>((T*)data, step); }
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline GpuMat::GpuMat()
|
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat::GpuMat(int rows_, int cols_, int type_)
|
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
|
||||||
{
|
|
||||||
if (rows_ > 0 && cols_ > 0)
|
|
||||||
create(rows_, cols_, type_);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat::GpuMat(Size size_, int type_)
|
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
|
||||||
{
|
|
||||||
if (size_.height > 0 && size_.width > 0)
|
|
||||||
create(size_.height, size_.width, type_);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat::GpuMat(int rows_, int cols_, int type_, const Scalar& s_)
|
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
|
||||||
{
|
|
||||||
if (rows_ > 0 && cols_ > 0)
|
|
||||||
{
|
|
||||||
create(rows_, cols_, type_);
|
|
||||||
setTo(s_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat::GpuMat(Size size_, int type_, const Scalar& s_)
|
|
||||||
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
|
||||||
{
|
|
||||||
if (size_.height > 0 && size_.width > 0)
|
|
||||||
{
|
|
||||||
create(size_.height, size_.width, type_);
|
|
||||||
setTo(s_);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat::~GpuMat()
|
|
||||||
{
|
|
||||||
release();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::clone() const
|
|
||||||
{
|
|
||||||
GpuMat m;
|
|
||||||
copyTo(m);
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void GpuMat::assignTo(GpuMat& m, int type) const
|
|
||||||
{
|
|
||||||
if (type < 0)
|
|
||||||
m = *this;
|
|
||||||
else
|
|
||||||
convertTo(m, type);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t GpuMat::step1() const
|
|
||||||
{
|
|
||||||
return step / elemSize1();
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool GpuMat::empty() const
|
|
||||||
{
|
|
||||||
return data == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename _Tp> inline _Tp* GpuMat::ptr(int y)
|
|
||||||
{
|
|
||||||
return (_Tp*)ptr(y);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename _Tp> inline const _Tp* GpuMat::ptr(int y) const
|
|
||||||
{
|
|
||||||
return (const _Tp*)ptr(y);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void swap(GpuMat& a, GpuMat& b)
|
|
||||||
{
|
|
||||||
a.swap(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::row(int y) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, Range(y, y+1), Range::all());
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::col(int x) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, Range::all(), Range(x, x+1));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::rowRange(int startrow, int endrow) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, Range(startrow, endrow), Range::all());
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::rowRange(const Range& r) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, r, Range::all());
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::colRange(int startcol, int endcol) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, Range::all(), Range(startcol, endcol));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::colRange(const Range& r) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, Range::all(), r);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void GpuMat::create(Size size_, int type_)
|
|
||||||
{
|
|
||||||
create(size_.height, size_.width, type_);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::operator()(Range rowRange, Range colRange) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, rowRange, colRange);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat GpuMat::operator()(const Rect& roi) const
|
|
||||||
{
|
|
||||||
return GpuMat(*this, roi);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline bool GpuMat::isContinuous() const
|
|
||||||
{
|
|
||||||
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t GpuMat::elemSize() const
|
|
||||||
{
|
|
||||||
return CV_ELEM_SIZE(flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline size_t GpuMat::elemSize1() const
|
|
||||||
{
|
|
||||||
return CV_ELEM_SIZE1(flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int GpuMat::type() const
|
|
||||||
{
|
|
||||||
return CV_MAT_TYPE(flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int GpuMat::depth() const
|
|
||||||
{
|
|
||||||
return CV_MAT_DEPTH(flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int GpuMat::channels() const
|
|
||||||
{
|
|
||||||
return CV_MAT_CN(flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline Size GpuMat::size() const
|
|
||||||
{
|
|
||||||
return Size(cols, rows);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline unsigned char* GpuMat::ptr(int y)
|
|
||||||
{
|
|
||||||
CV_DbgAssert((unsigned)y < (unsigned)rows);
|
|
||||||
return data + step * y;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline const unsigned char* GpuMat::ptr(int y) const
|
|
||||||
{
|
|
||||||
CV_DbgAssert((unsigned)y < (unsigned)rows);
|
|
||||||
return data + step * y;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat& GpuMat::operator = (const Scalar& s)
|
|
||||||
{
|
|
||||||
setTo(s);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat createContinuous(int rows, int cols, int type)
|
|
||||||
{
|
|
||||||
GpuMat m;
|
|
||||||
createContinuous(rows, cols, type, m);
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void createContinuous(Size size, int type, GpuMat& m)
|
|
||||||
{
|
|
||||||
createContinuous(size.height, size.width, type, m);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline GpuMat createContinuous(Size size, int type)
|
|
||||||
{
|
|
||||||
GpuMat m;
|
|
||||||
createContinuous(size, type, m);
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ensureSizeIsEnough(Size size, int type, GpuMat& m)
|
|
||||||
{
|
|
||||||
ensureSizeIsEnough(size.height, size.width, type, m);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void createContinuous(int rows, int cols, int type, GpuMat& m)
|
|
||||||
{
|
|
||||||
int area = rows * cols;
|
|
||||||
if (!m.isContinuous() || m.type() != type || m.size().area() != area)
|
|
||||||
m.create(1, area, type);
|
|
||||||
m = m.reshape(0, rows);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void ensureSizeIsEnough(int rows, int cols, int type, GpuMat& m)
|
|
||||||
{
|
|
||||||
if (m.type() == type && m.rows >= rows && m.cols >= cols)
|
|
||||||
m = m(Rect(0, 0, cols, rows));
|
|
||||||
else
|
|
||||||
m.create(rows, cols, type);
|
|
||||||
}
|
|
||||||
}}
|
|
||||||
|
|
||||||
#endif // __OPENCV_GPUMAT_HPP__
|
|
||||||
|
@ -1,142 +0,0 @@
|
|||||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
||||||
//
|
|
||||||
// By downloading, copying, installing or using the software you agree to this license.
|
|
||||||
// If you do not agree to this license, do not download, install,
|
|
||||||
// copy or use the software.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// License Agreement
|
|
||||||
// For Open Source Computer Vision Library
|
|
||||||
//
|
|
||||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
|
||||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
|
||||||
// Third party copyrights are property of their respective owners.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
// are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// * Redistribution's of source code must retain the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other GpuMaterials provided with the distribution.
|
|
||||||
//
|
|
||||||
// * The name of the copyright holders may not be used to endorse or promote products
|
|
||||||
// derived from this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// This software is provided by the copyright holders and contributors "as is" and
|
|
||||||
// any express or implied warranties, including, but not limited to, the implied
|
|
||||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
||||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
||||||
// indirect, incidental, special, exemplary, or consequential damages
|
|
||||||
// (including, but not limited to, procurement of substitute goods or services;
|
|
||||||
// loss of use, data, or profits; or business interruption) however caused
|
|
||||||
// and on any theory of liability, whether in contract, strict liability,
|
|
||||||
// or tort (including negligence or otherwise) arising in any way out of
|
|
||||||
// the use of this software, even if advised of the possibility of such damage.
|
|
||||||
//
|
|
||||||
//M*/
|
|
||||||
|
|
||||||
#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
|
|
||||||
#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
|
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
|
|
||||||
namespace gpu
|
|
||||||
{
|
|
||||||
///////////////////////////////////////////////////////////////////////
|
|
||||||
//////////////////////////////// CudaMem ////////////////////////////////
|
|
||||||
///////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
inline CudaMem::CudaMem() : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0) {}
|
|
||||||
inline CudaMem::CudaMem(int _rows, int _cols, int _type, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
|
|
||||||
{
|
|
||||||
if( _rows > 0 && _cols > 0 )
|
|
||||||
create( _rows, _cols, _type, _alloc_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline CudaMem::CudaMem(Size _size, int _type, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
|
|
||||||
{
|
|
||||||
if( _size.height > 0 && _size.width > 0 )
|
|
||||||
create( _size.height, _size.width, _type, _alloc_type);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline CudaMem::CudaMem(const CudaMem& m) : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
|
|
||||||
{
|
|
||||||
if( refcount )
|
|
||||||
CV_XADD(refcount, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline CudaMem::CudaMem(const Mat& m, int _alloc_type) : flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
|
|
||||||
{
|
|
||||||
if( m.rows > 0 && m.cols > 0 )
|
|
||||||
create( m.size(), m.type(), _alloc_type);
|
|
||||||
|
|
||||||
Mat tmp = createMatHeader();
|
|
||||||
m.copyTo(tmp);
|
|
||||||
}
|
|
||||||
|
|
||||||
inline CudaMem::~CudaMem()
|
|
||||||
{
|
|
||||||
release();
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
inline CudaMem& CudaMem::operator = (const CudaMem& m)
|
|
||||||
{
|
|
||||||
if( this != &m )
|
|
||||||
{
|
|
||||||
if( m.refcount )
|
|
||||||
CV_XADD(m.refcount, 1);
|
|
||||||
release();
|
|
||||||
flags = m.flags;
|
|
||||||
rows = m.rows; cols = m.cols;
|
|
||||||
step = m.step; data = m.data;
|
|
||||||
datastart = m.datastart;
|
|
||||||
dataend = m.dataend;
|
|
||||||
refcount = m.refcount;
|
|
||||||
alloc_type = m.alloc_type;
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline CudaMem CudaMem::clone() const
|
|
||||||
{
|
|
||||||
CudaMem m(size(), type(), alloc_type);
|
|
||||||
Mat to = m;
|
|
||||||
Mat from = *this;
|
|
||||||
from.copyTo(to);
|
|
||||||
return m;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline void CudaMem::create(Size _size, int _type, int _alloc_type) { create(_size.height, _size.width, _type, _alloc_type); }
|
|
||||||
|
|
||||||
|
|
||||||
//CCP void CudaMem::create(int _rows, int _cols, int _type, int _alloc_type);
|
|
||||||
//CPP void CudaMem::release();
|
|
||||||
|
|
||||||
inline Mat CudaMem::createMatHeader() const { return Mat(size(), type(), data, step); }
|
|
||||||
inline CudaMem::operator Mat() const { return createMatHeader(); }
|
|
||||||
|
|
||||||
inline CudaMem::operator GpuMat() const { return createGpuMatHeader(); }
|
|
||||||
//CPP GpuMat CudaMem::createGpuMatHeader() const;
|
|
||||||
|
|
||||||
inline bool CudaMem::isContinuous() const { return (flags & Mat::CONTINUOUS_FLAG) != 0; }
|
|
||||||
inline size_t CudaMem::elemSize() const { return CV_ELEM_SIZE(flags); }
|
|
||||||
inline size_t CudaMem::elemSize1() const { return CV_ELEM_SIZE1(flags); }
|
|
||||||
inline int CudaMem::type() const { return CV_MAT_TYPE(flags); }
|
|
||||||
inline int CudaMem::depth() const { return CV_MAT_DEPTH(flags); }
|
|
||||||
inline int CudaMem::channels() const { return CV_MAT_CN(flags); }
|
|
||||||
inline size_t CudaMem::step1() const { return step/elemSize1(); }
|
|
||||||
inline Size CudaMem::size() const { return Size(cols, rows); }
|
|
||||||
inline bool CudaMem::empty() const { return data == 0; }
|
|
||||||
|
|
||||||
} /* end of namespace gpu */
|
|
||||||
|
|
||||||
} /* end of namespace cv */
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ */
|
|
@ -24,7 +24,7 @@ PERF_TEST_P(DevInfo_Size_MatType, transpose, testing::Combine(testing::ValuesIn(
|
|||||||
transpose(src, dst);
|
transpose(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -55,7 +55,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, flip, testing::Combine(testing::Value
|
|||||||
flip(src, dst, flipCode);
|
flip(src, dst, flipCode);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -85,7 +85,7 @@ PERF_TEST_P(DevInfo_Size_MatType, LUT, testing::Combine(testing::ValuesIn(device
|
|||||||
LUT(src, lut, dst);
|
LUT(src, lut, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -115,8 +115,8 @@ PERF_TEST_P(DevInfo_Size, cartToPolar, testing::Combine(testing::ValuesIn(device
|
|||||||
cartToPolar(x, y, magnitude, angle);
|
cartToPolar(x, y, magnitude, angle);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat magnitude_host = magnitude;
|
Mat magnitude_host(magnitude);
|
||||||
Mat angle_host = angle;
|
Mat angle_host(angle);
|
||||||
|
|
||||||
SANITY_CHECK(magnitude_host);
|
SANITY_CHECK(magnitude_host);
|
||||||
SANITY_CHECK(angle_host);
|
SANITY_CHECK(angle_host);
|
||||||
@ -147,8 +147,8 @@ PERF_TEST_P(DevInfo_Size, polarToCart, testing::Combine(testing::ValuesIn(device
|
|||||||
polarToCart(magnitude, angle, x, y);
|
polarToCart(magnitude, angle, x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat x_host = x;
|
Mat x_host(x);
|
||||||
Mat y_host = angle;
|
Mat y_host(y);
|
||||||
|
|
||||||
SANITY_CHECK(x_host);
|
SANITY_CHECK(x_host);
|
||||||
SANITY_CHECK(y_host);
|
SANITY_CHECK(y_host);
|
||||||
@ -180,7 +180,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addMat, testing::Combine(testing::ValuesIn(dev
|
|||||||
add(a, b, c);
|
add(a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat c_host = c;
|
Mat c_host(c);
|
||||||
|
|
||||||
SANITY_CHECK(c_host);
|
SANITY_CHECK(c_host);
|
||||||
}
|
}
|
||||||
@ -210,7 +210,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addScalar, testing::Combine(testing::ValuesIn(
|
|||||||
add(a, b, c);
|
add(a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat c_host = c;
|
Mat c_host(c);
|
||||||
|
|
||||||
SANITY_CHECK(c_host);
|
SANITY_CHECK(c_host);
|
||||||
}
|
}
|
||||||
@ -241,7 +241,7 @@ PERF_TEST_P(DevInfo_Size_MatType, subtractMat, testing::Combine(testing::ValuesI
|
|||||||
subtract(a, b, c);
|
subtract(a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat c_host = c;
|
Mat c_host(c);
|
||||||
|
|
||||||
SANITY_CHECK(c_host);
|
SANITY_CHECK(c_host);
|
||||||
}
|
}
|
||||||
@ -270,7 +270,7 @@ PERF_TEST_P(DevInfo_Size, multiplyMat, testing::Combine(testing::ValuesIn(device
|
|||||||
multiply(a, b, c);
|
multiply(a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat c_host = c;
|
Mat c_host(c);
|
||||||
|
|
||||||
SANITY_CHECK(c_host);
|
SANITY_CHECK(c_host);
|
||||||
}
|
}
|
||||||
@ -300,7 +300,7 @@ PERF_TEST_P(DevInfo_Size_MatType, multiplyScalar, testing::Combine(testing::Valu
|
|||||||
multiply(a, b, c);
|
multiply(a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat c_host = c;
|
Mat c_host(c);
|
||||||
|
|
||||||
SANITY_CHECK(c_host);
|
SANITY_CHECK(c_host);
|
||||||
}
|
}
|
||||||
@ -327,7 +327,7 @@ PERF_TEST_P(DevInfo_Size, exp, testing::Combine(testing::ValuesIn(devices()),
|
|||||||
exp(a, b);
|
exp(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat b_host = b;
|
Mat b_host(b);
|
||||||
|
|
||||||
SANITY_CHECK(b_host);
|
SANITY_CHECK(b_host);
|
||||||
}
|
}
|
||||||
@ -356,7 +356,7 @@ PERF_TEST_P(DevInfo_Size_MatType, pow, testing::Combine(testing::ValuesIn(device
|
|||||||
pow(src, 2.0, dst);
|
pow(src, 2.0, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -389,7 +389,7 @@ PERF_TEST_P(DevInfo_Size_MatType_CmpOp, compare, testing::Combine(testing::Value
|
|||||||
compare(src1, src2, dst, cmpop);
|
compare(src1, src2, dst, cmpop);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -418,7 +418,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_not, testing::Combine(testing::ValuesI
|
|||||||
bitwise_not(src, dst);
|
bitwise_not(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -449,7 +449,7 @@ PERF_TEST_P(DevInfo_Size_MatType, bitwise_and, testing::Combine(testing::ValuesI
|
|||||||
bitwise_and(src1, src2, dst);
|
bitwise_and(src1, src2, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -480,7 +480,7 @@ PERF_TEST_P(DevInfo_Size_MatType, min, testing::Combine(testing::ValuesIn(device
|
|||||||
min(src1, src2, dst);
|
min(src1, src2, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -712,7 +712,7 @@ PERF_TEST_P(DevInfo_Size_MatType, addWeighted, testing::Combine(testing::ValuesI
|
|||||||
addWeighted(src1, 0.5, src2, 0.5, 0.0, dst);
|
addWeighted(src1, 0.5, src2, 0.5, 0.0, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -743,7 +743,7 @@ PERF_TEST_P(DevInfo_Size_MatType_FlipCode, reduce, testing::Combine(testing::Val
|
|||||||
reduce(src, dst, dim, CV_REDUCE_MIN);
|
reduce(src, dst, dim, CV_REDUCE_MIN);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -774,7 +774,7 @@ PERF_TEST_P(DevInfo_Size, gemm, testing::Combine(testing::ValuesIn(devices()),
|
|||||||
gemm(src1, src2, 1.0, src3, 1.0, dst);
|
gemm(src1, src2, 1.0, src3, 1.0, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ PERF_TEST_P(DevInfo, transformPoints, testing::ValuesIn(devices()))
|
|||||||
transformPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), dst);
|
transformPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -45,7 +45,7 @@ PERF_TEST_P(DevInfo, projectPoints, testing::ValuesIn(devices()))
|
|||||||
projectPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), Mat::ones(3, 3, CV_32FC1), Mat(), dst);
|
projectPoints(src, Mat::ones(1, 3, CV_32FC1), Mat::ones(1, 3, CV_32FC1), Mat::ones(3, 3, CV_32FC1), Mat(), dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, boxFilter, testing::Combine(testing
|
|||||||
filter->apply(src, dst);
|
filter->apply(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MorphOp_KernelSize, morphologyFilter, testing::
|
|||||||
filter->apply(src, dst);
|
filter->apply(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -96,7 +96,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, linearFilter, testing::Combine(test
|
|||||||
filter->apply(src, dst);
|
filter->apply(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -130,7 +130,7 @@ PERF_TEST_P(DevInfo_Size_MatType_KernelSize, separableLinearFilter, testing::Com
|
|||||||
filter->apply(src, dst, Rect(0, 0, src.cols, src.rows));
|
filter->apply(src, dst, Rect(0, 0, src.cols, src.rows));
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
|
@ -36,7 +36,7 @@ PERF_TEST_P(DevInfo_Size_MatType_Interpolation_BorderMode, remap, testing::Combi
|
|||||||
remap(src, dst, xmap, ymap, interpolation, borderMode);
|
remap(src, dst, xmap, ymap, interpolation, borderMode);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -63,7 +63,7 @@ PERF_TEST_P(DevInfo, meanShiftFiltering, testing::ValuesIn(devices()))
|
|||||||
meanShiftFiltering(src, dst, 50, 50);
|
meanShiftFiltering(src, dst, 50, 50);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -91,8 +91,8 @@ PERF_TEST_P(DevInfo, meanShiftProc, testing::ValuesIn(devices()))
|
|||||||
meanShiftProc(src, dstr, dstsp, 50, 50);
|
meanShiftProc(src, dstr, dstsp, 50, 50);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dstr_host = dstr;
|
Mat dstr_host(dstr);
|
||||||
Mat dstsp_host = dstsp;
|
Mat dstsp_host(dstsp);
|
||||||
|
|
||||||
SANITY_CHECK(dstr_host);
|
SANITY_CHECK(dstr_host);
|
||||||
SANITY_CHECK(dstsp_host);
|
SANITY_CHECK(dstsp_host);
|
||||||
|
@ -25,7 +25,7 @@ PERF_TEST_P(DevInfo_Size_MatType, merge, testing::Combine(testing::ValuesIn(devi
|
|||||||
merge(src, dst);
|
merge(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -82,7 +82,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setTo, testing::Combine(testing::ValuesIn(devi
|
|||||||
src.setTo(val);
|
src.setTo(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat src_host = src;
|
Mat src_host(src);
|
||||||
|
|
||||||
SANITY_CHECK(src_host);
|
SANITY_CHECK(src_host);
|
||||||
}
|
}
|
||||||
@ -115,7 +115,7 @@ PERF_TEST_P(DevInfo_Size_MatType, setToMasked, testing::Combine(testing::ValuesI
|
|||||||
src.setTo(val, mask);
|
src.setTo(val, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
src_host = src;
|
src.download(src_host);
|
||||||
|
|
||||||
SANITY_CHECK(src_host);
|
SANITY_CHECK(src_host);
|
||||||
}
|
}
|
||||||
@ -148,7 +148,7 @@ PERF_TEST_P(DevInfo_Size_MatType, copyToMasked, testing::Combine(testing::Values
|
|||||||
src.copyTo(dst, mask);
|
src.copyTo(dst, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
@ -182,7 +182,7 @@ PERF_TEST_P(DevInfo_Size_MatType_MatType, convertTo, testing::Combine(testing::V
|
|||||||
src.convertTo(dst, type2, a, b);
|
src.convertTo(dst, type2, a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
Mat dst_host = dst;
|
Mat dst_host(dst);
|
||||||
|
|
||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
|
@ -425,16 +425,22 @@ void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Polar <-> Cart
|
// Polar <-> Cart
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace mathfunc
|
||||||
{
|
{
|
||||||
void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream);
|
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
||||||
void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
|
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
|
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
|
||||||
|
|
||||||
CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
|
CV_DbgAssert(x.size() == y.size() && x.type() == y.type());
|
||||||
CV_Assert(x.depth() == CV_32F);
|
CV_Assert(x.depth() == CV_32F);
|
||||||
|
|
||||||
@ -448,11 +454,13 @@ namespace
|
|||||||
GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
|
GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
|
||||||
GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
|
GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
|
||||||
|
|
||||||
mathfunc::cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
|
cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
|
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ mathfunc;
|
||||||
|
|
||||||
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
|
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
|
||||||
CV_Assert(mag.depth() == CV_32F);
|
CV_Assert(mag.depth() == CV_32F);
|
||||||
|
|
||||||
@ -464,34 +472,33 @@ namespace
|
|||||||
GpuMat x1cn = x.reshape(1);
|
GpuMat x1cn = x.reshape(1);
|
||||||
GpuMat y1cn = y.reshape(1);
|
GpuMat y1cn = y.reshape(1);
|
||||||
|
|
||||||
mathfunc::polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
|
polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
|
cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
|
cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
||||||
{
|
{
|
||||||
::cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
|
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
|
||||||
{
|
{
|
||||||
::polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
|
polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
@ -55,13 +55,19 @@ void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&,
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
|
||||||
|
|
||||||
void bilateral_filter_gpu(const DevMem2Db& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream);
|
namespace bilateral_filter
|
||||||
void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream);
|
{
|
||||||
}}}
|
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc);
|
||||||
|
|
||||||
|
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
||||||
|
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ bilateral_filter;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -105,7 +111,7 @@ namespace
|
|||||||
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
|
short edge_disc = max<short>(short(1), short(ndisp * edge_threshold + 0.5));
|
||||||
short max_disc = short(ndisp * max_disc_threshold + 0.5);
|
short max_disc = short(ndisp * max_disc_threshold + 0.5);
|
||||||
|
|
||||||
bf::load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
|
load_constants(table_color.ptr<float>(), table_space, ndisp, radius, edge_disc, max_disc);
|
||||||
|
|
||||||
if (&dst != &disp)
|
if (&dst != &disp)
|
||||||
{
|
{
|
||||||
@ -115,7 +121,7 @@ namespace
|
|||||||
disp.copyTo(dst);
|
disp.copyTo(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
bf::bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
bilateral_filter_gpu((DevMem2D_<T>)dst, img, img.channels(), iters, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold,
|
||||||
|
@ -52,15 +52,19 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
namespace cv { namespace gpu
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace blend
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearCaller(int rows, int cols, int cn, const PtrStep<T>& img1, const PtrStep<T>& img2,
|
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
|
||||||
const PtrStepf& weights1, const PtrStepf& weights2, PtrStep<T> result, cudaStream_t stream);
|
|
||||||
|
|
||||||
void blendLinearCaller8UC4(int rows, int cols, const PtrStepb& img1, const PtrStepb& img2,
|
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
|
||||||
const PtrStepf& weights1, const PtrStepf& weights2, PtrStepb result, cudaStream_t stream);
|
}
|
||||||
}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ blend;
|
||||||
|
|
||||||
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
void cv::gpu::blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||||
GpuMat& result, Stream& stream)
|
GpuMat& result, Stream& stream)
|
||||||
|
@ -82,7 +82,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatch(const GpuMat&, vector< vec
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf_match
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace bf_match
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
@ -103,9 +105,9 @@ namespace cv { namespace gpu { namespace bf_match
|
|||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf_knnmatch
|
namespace bf_knnmatch
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
@ -126,9 +128,9 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
template <typename T> void match2Hamming_gpu(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf_radius_match
|
namespace bf_radius_match
|
||||||
{
|
{
|
||||||
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
template <typename T> void matchL1_gpu(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
@ -151,15 +153,17 @@ namespace cv { namespace gpu { namespace bf_radius_match
|
|||||||
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
template <typename T> void matchHamming_gpu(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
int cc, cudaStream_t stream);
|
int cc, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// Train collection
|
||||||
|
|
||||||
cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)
|
cv::gpu::BruteForceMatcher_GPU_base::BruteForceMatcher_GPU_base(DistType distType_) : distType(distType_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////
|
|
||||||
// Train collection
|
|
||||||
|
|
||||||
void cv::gpu::BruteForceMatcher_GPU_base::add(const vector<GpuMat>& descCollection)
|
void cv::gpu::BruteForceMatcher_GPU_base::add(const vector<GpuMat>& descCollection)
|
||||||
{
|
{
|
||||||
trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
|
trainDescCollection.insert(trainDescCollection.end(), descCollection.begin(), descCollection.end());
|
||||||
@ -195,7 +199,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchSingle(const GpuMat& query, const
|
|||||||
if (query.empty() || train.empty())
|
if (query.empty() || train.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace cv::gpu::bf_match;
|
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance,
|
||||||
@ -242,8 +246,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
|
|||||||
if (trainIdx.empty() || distance.empty())
|
if (trainIdx.empty() || distance.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Mat trainIdxCPU = trainIdx;
|
Mat trainIdxCPU(trainIdx);
|
||||||
Mat distanceCPU = distance;
|
Mat distanceCPU(distance);
|
||||||
|
|
||||||
matchConvert(trainIdxCPU, distanceCPU, matches);
|
matchConvert(trainIdxCPU, distanceCPU, matches);
|
||||||
}
|
}
|
||||||
@ -337,7 +341,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchCollection(const GpuMat& query, c
|
|||||||
if (query.empty() || trainCollection.empty())
|
if (query.empty() || trainCollection.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace cv::gpu::bf_match;
|
using namespace OPENCV_DEVICE_NAMESPACE_ bf_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance,
|
||||||
@ -384,9 +388,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::matchDownload(const GpuMat& trainIdx,
|
|||||||
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
|
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Mat trainIdxCPU = trainIdx;
|
Mat trainIdxCPU(trainIdx);
|
||||||
Mat imgIdxCPU = imgIdx;
|
Mat imgIdxCPU(imgIdx);
|
||||||
Mat distanceCPU = distance;
|
Mat distanceCPU(distance);
|
||||||
|
|
||||||
matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
|
matchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, matches);
|
||||||
}
|
}
|
||||||
@ -448,7 +452,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchSingle(const GpuMat& query, co
|
|||||||
if (query.empty() || train.empty())
|
if (query.empty() || train.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace cv::gpu::bf_knnmatch;
|
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, int k, const DevMem2Db& mask,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
const DevMem2Db& trainIdx, const DevMem2Db& distance, const DevMem2Df& allDist,
|
||||||
@ -511,8 +515,8 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatchDownload(const GpuMat& trainId
|
|||||||
if (trainIdx.empty() || distance.empty())
|
if (trainIdx.empty() || distance.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Mat trainIdxCPU = trainIdx;
|
Mat trainIdxCPU(trainIdx);
|
||||||
Mat distanceCPU = distance;
|
Mat distanceCPU(distance);
|
||||||
|
|
||||||
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
|
knnMatchConvert(trainIdxCPU, distanceCPU, matches, compactResult);
|
||||||
}
|
}
|
||||||
@ -577,7 +581,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Collection(const GpuMat& quer
|
|||||||
if (query.empty() || trainCollection.empty())
|
if (query.empty() || trainCollection.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace cv::gpu::bf_knnmatch;
|
using namespace OPENCV_DEVICE_NAMESPACE_ bf_knnmatch;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks,
|
||||||
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance,
|
||||||
@ -630,9 +634,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::knnMatch2Download(const GpuMat& trainI
|
|||||||
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
|
if (trainIdx.empty() || imgIdx.empty() || distance.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Mat trainIdxCPU = trainIdx;
|
Mat trainIdxCPU(trainIdx);
|
||||||
Mat imgIdxCPU = imgIdx;
|
Mat imgIdxCPU(imgIdx);
|
||||||
Mat distanceCPU = distance;
|
Mat distanceCPU(distance);
|
||||||
|
|
||||||
knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
|
knnMatch2Convert(trainIdxCPU, imgIdxCPU, distanceCPU, matches, compactResult);
|
||||||
}
|
}
|
||||||
@ -758,7 +762,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchSingle(const GpuMat& query,
|
|||||||
if (query.empty() || train.empty())
|
if (query.empty() || train.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace cv::gpu::bf_radius_match;
|
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db& train, float maxDistance, const DevMem2Db& mask,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
@ -819,9 +823,9 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
|
|||||||
if (trainIdx.empty() || distance.empty() || nMatches.empty())
|
if (trainIdx.empty() || distance.empty() || nMatches.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Mat trainIdxCPU = trainIdx;
|
Mat trainIdxCPU(trainIdx);
|
||||||
Mat distanceCPU = distance;
|
Mat distanceCPU(distance);
|
||||||
Mat nMatchesCPU = nMatches;
|
Mat nMatchesCPU(nMatches);
|
||||||
|
|
||||||
radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
|
radiusMatchConvert(trainIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
|
||||||
}
|
}
|
||||||
@ -889,7 +893,7 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchCollection(const GpuMat& qu
|
|||||||
if (query.empty() || empty())
|
if (query.empty() || empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
using namespace cv::gpu::bf_radius_match;
|
using namespace OPENCV_DEVICE_NAMESPACE_ bf_radius_match;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
typedef void (*caller_t)(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks,
|
||||||
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches,
|
||||||
@ -953,10 +957,10 @@ void cv::gpu::BruteForceMatcher_GPU_base::radiusMatchDownload(const GpuMat& trai
|
|||||||
if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
|
if (trainIdx.empty() || imgIdx.empty() || distance.empty() || nMatches.empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
Mat trainIdxCPU = trainIdx;
|
Mat trainIdxCPU(trainIdx);
|
||||||
Mat imgIdxCPU = imgIdx;
|
Mat imgIdxCPU(imgIdx);
|
||||||
Mat distanceCPU = distance;
|
Mat distanceCPU(distance);
|
||||||
Mat nMatchesCPU = nMatches;
|
Mat nMatchesCPU(nMatches);
|
||||||
|
|
||||||
radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
|
radiusMatchConvert(trainIdxCPU, imgIdxCPU, distanceCPU, nMatchesCPU, matches, compactResult);
|
||||||
}
|
}
|
||||||
|
@ -42,6 +42,10 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::gpu;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
#if !defined(HAVE_CUDA)
|
#if !defined(HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||||
@ -52,13 +56,31 @@ void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
using namespace cv;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu;
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace transform_points
|
namespace transform_points
|
||||||
{
|
{
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
|
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, DevMem2D_<float3> dst, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
namespace project_points
|
||||||
|
{
|
||||||
|
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace solve_pnp_ransac
|
||||||
|
{
|
||||||
|
int maxNumIters();
|
||||||
|
|
||||||
|
void computeHypothesisScores(
|
||||||
|
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
||||||
|
const float3* transl_vectors, const float3* object, const float2* image,
|
||||||
|
const float dist_threshold, int* hypothesis_scores);
|
||||||
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -79,15 +101,9 @@ namespace
|
|||||||
|
|
||||||
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
|
void cv::gpu::transformPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
::transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
|
transformPointsCaller(src, rvec, tvec, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace project_points
|
|
||||||
{
|
|
||||||
void call(const DevMem2D_<float3> src, const float* rot, const float* transl, const float* proj, DevMem2D_<float2> dst, cudaStream_t stream);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
|
void projectPointsCaller(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, cudaStream_t stream)
|
||||||
@ -109,20 +125,9 @@ namespace
|
|||||||
|
|
||||||
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
|
void cv::gpu::projectPoints(const GpuMat& src, const Mat& rvec, const Mat& tvec, const Mat& camera_mat, const Mat& dist_coef, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
::projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
|
projectPointsCaller(src, rvec, tvec, camera_mat, dist_coef, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace solve_pnp_ransac
|
|
||||||
{
|
|
||||||
int maxNumIters();
|
|
||||||
|
|
||||||
void computeHypothesisScores(
|
|
||||||
const int num_hypotheses, const int num_points, const float* rot_matrices,
|
|
||||||
const float3* transl_vectors, const float3* object, const float2* image,
|
|
||||||
const float dist_threshold, int* hypothesis_scores);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
// Selects subset_size random different points from [0, num_points - 1] range
|
// Selects subset_size random different points from [0, num_points - 1] range
|
||||||
|
@ -46,7 +46,6 @@ using namespace cv;
|
|||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
|
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
|
||||||
|
@ -51,8 +51,8 @@ void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
|
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
|
||||||
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
void name(const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -199,7 +199,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ONE
|
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ONE
|
||||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ALL
|
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ALL
|
||||||
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F
|
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
@ -45,11 +45,10 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace bf_knnmatch {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf_knnmatch
|
|
||||||
{
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Reduction
|
// Reduction
|
||||||
|
|
||||||
@ -216,7 +215,7 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||||
{
|
{
|
||||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||||
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(min(queryIdx, query.rows - 1))[loadX] : 0;
|
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -242,7 +241,7 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -418,10 +417,10 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -592,10 +591,10 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -829,8 +828,8 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
|
|
||||||
if (loadX < query.cols)
|
if (loadX < query.cols)
|
||||||
{
|
{
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX];
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(::min(queryIdx, query.rows - 1))[loadX];
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -892,8 +891,8 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
|
|
||||||
if (loadX < query.cols)
|
if (loadX < query.cols)
|
||||||
{
|
{
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(min(queryIdx, query.rows - 1))[loadX];
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = query.ptr(::min(queryIdx, query.rows - 1))[loadX];
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1))[loadX];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1156,4 +1155,7 @@ namespace cv { namespace gpu { namespace bf_knnmatch
|
|||||||
template void match2Hamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
//template void match2Hamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
//template void match2Hamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
template void match2Hamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
template void match2Hamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Db& trainIdx, const DevMem2Db& imgIdx, const DevMem2Db& distance, int cc, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace bf_knnmatch
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -45,11 +45,10 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace bf_match {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf_match
|
|
||||||
{
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Reduction
|
// Reduction
|
||||||
|
|
||||||
@ -93,7 +92,7 @@ namespace cv { namespace gpu { namespace bf_match
|
|||||||
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
for (int i = 0; i < MAX_DESC_LEN / BLOCK_SIZE; ++i)
|
||||||
{
|
{
|
||||||
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
const int loadX = threadIdx.x + i * BLOCK_SIZE;
|
||||||
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(min(queryIdx, query.rows - 1))[loadX] : 0;
|
s_query[threadIdx.y * MAX_DESC_LEN + loadX] = loadX < query.cols ? query.ptr(::min(queryIdx, query.rows - 1))[loadX] : 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -117,7 +116,7 @@ namespace cv { namespace gpu { namespace bf_match
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,10 +273,10 @@ namespace cv { namespace gpu { namespace bf_match
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -429,10 +428,10 @@ namespace cv { namespace gpu { namespace bf_match
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(t * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -775,4 +774,7 @@ namespace cv { namespace gpu { namespace bf_match
|
|||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db& trains, const DevMem2D_<PtrStepb>& masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, int cc, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace bf_match
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -45,11 +45,10 @@
|
|||||||
#include "opencv2/gpu/device/vec_distance.hpp"
|
#include "opencv2/gpu/device/vec_distance.hpp"
|
||||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace bf_radius_match {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf_radius_match
|
|
||||||
{
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Match Unrolled
|
// Match Unrolled
|
||||||
|
|
||||||
@ -81,10 +80,10 @@ namespace cv { namespace gpu { namespace bf_radius_match
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,10 +191,10 @@ namespace cv { namespace gpu { namespace bf_radius_match
|
|||||||
{
|
{
|
||||||
T val;
|
T val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(query.ptr(min(queryIdx, query.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(query.ptr(::min(queryIdx, query.rows - 1)), loadX, val);
|
||||||
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
s_query[threadIdx.y * BLOCK_SIZE + threadIdx.x] = val;
|
||||||
|
|
||||||
ForceGlob<T>::Load(train.ptr(min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
ForceGlob<T>::Load(train.ptr(::min(blockIdx.x * BLOCK_SIZE + threadIdx.y, train.rows - 1)), loadX, val);
|
||||||
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
s_train[threadIdx.x * BLOCK_SIZE + threadIdx.y] = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -462,4 +461,7 @@ namespace cv { namespace gpu { namespace bf_radius_match
|
|||||||
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<ushort>(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
//template void matchHamming_gpu<short >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
template void matchHamming_gpu<int >(const DevMem2Db& query, const DevMem2Db* trains, int n, float maxDistance, const DevMem2Db* masks, const DevMem2Di& trainIdx, const DevMem2Di& imgIdx, const DevMem2Df& distance, const DevMem2D_<unsigned int>& nMatches, int cc, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace bf_radius_match
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -43,11 +43,10 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace bilateral_filter {
|
||||||
|
|
||||||
namespace bf_krnls
|
|
||||||
{
|
|
||||||
__constant__ float* ctable_color;
|
__constant__ float* ctable_color;
|
||||||
__constant__ float* ctable_space;
|
__constant__ float* ctable_space;
|
||||||
__constant__ size_t ctable_space_step;
|
__constant__ size_t ctable_space_step;
|
||||||
@ -57,36 +56,30 @@ namespace bf_krnls
|
|||||||
|
|
||||||
__constant__ short cedge_disc;
|
__constant__ short cedge_disc;
|
||||||
__constant__ short cmax_disc;
|
__constant__ short cmax_disc;
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf
|
void load_constants(float* table_color, DevMem2Df table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
||||||
{
|
{
|
||||||
void load_constants(float* table_color, const DevMem2Df& table_space, int ndisp, int radius, short edge_disc, short max_disc)
|
cudaSafeCall( cudaMemcpyToSymbol(ctable_color, &table_color, sizeof(table_color)) );
|
||||||
{
|
cudaSafeCall( cudaMemcpyToSymbol(ctable_space, &table_space.data, sizeof(table_space.data)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_color, &table_color, sizeof(table_color)) );
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space, &table_space.data, sizeof(table_space.data)) );
|
|
||||||
size_t table_space_step = table_space.step / sizeof(float);
|
size_t table_space_step = table_space.step / sizeof(float);
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::ctable_space_step, &table_space_step, sizeof(size_t)) );
|
cudaSafeCall( cudaMemcpyToSymbol(ctable_space_step, &table_space_step, sizeof(size_t)) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cndisp, &ndisp, sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cndisp, &ndisp, sizeof(int)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cradius, &radius, sizeof(int)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cradius, &radius, sizeof(int)) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cedge_disc, &edge_disc, sizeof(short)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cedge_disc, &edge_disc, sizeof(short)) );
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(bf_krnls::cmax_disc, &max_disc, sizeof(short)) );
|
cudaSafeCall( cudaMemcpyToSymbol(cmax_disc, &max_disc, sizeof(short)) );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
namespace bf_krnls
|
|
||||||
{
|
|
||||||
template <int channels>
|
template <int channels>
|
||||||
struct DistRgbMax
|
struct DistRgbMax
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||||
{
|
{
|
||||||
uchar x = abs(a[0] - b[0]);
|
uchar x = ::abs(a[0] - b[0]);
|
||||||
uchar y = abs(a[1] - b[1]);
|
uchar y = ::abs(a[1] - b[1]);
|
||||||
uchar z = abs(a[2] - b[2]);
|
uchar z = ::abs(a[2] - b[2]);
|
||||||
return (max(max(x, y), z));
|
return (::max(::max(x, y), z));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -95,7 +88,7 @@ namespace bf_krnls
|
|||||||
{
|
{
|
||||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||||
{
|
{
|
||||||
return abs(a[0] - b[0]);
|
return ::abs(a[0] - b[0]);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -115,12 +108,12 @@ namespace bf_krnls
|
|||||||
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
dp[3] = *(disp + (y+1) * disp_step + x + 0);
|
||||||
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
dp[4] = *(disp + (y ) * disp_step + x + 1);
|
||||||
|
|
||||||
if(abs(dp[1] - dp[0]) >= cedge_disc || abs(dp[2] - dp[0]) >= cedge_disc || abs(dp[3] - dp[0]) >= cedge_disc || abs(dp[4] - dp[0]) >= cedge_disc)
|
if(::abs(dp[1] - dp[0]) >= cedge_disc || ::abs(dp[2] - dp[0]) >= cedge_disc || ::abs(dp[3] - dp[0]) >= cedge_disc || ::abs(dp[4] - dp[0]) >= cedge_disc)
|
||||||
{
|
{
|
||||||
const int ymin = max(0, y - cradius);
|
const int ymin = ::max(0, y - cradius);
|
||||||
const int xmin = max(0, x - cradius);
|
const int xmin = ::max(0, x - cradius);
|
||||||
const int ymax = min(h - 1, y + cradius);
|
const int ymax = ::min(h - 1, y + cradius);
|
||||||
const int xmax = min(w - 1, x + cradius);
|
const int xmax = ::min(w - 1, x + cradius);
|
||||||
|
|
||||||
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
float cost[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
|
||||||
|
|
||||||
@ -136,15 +129,15 @@ namespace bf_krnls
|
|||||||
|
|
||||||
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
|
uchar dist_rgb = DistRgbMax<channels>::calc(in, ic);
|
||||||
|
|
||||||
const float weight = ctable_color[dist_rgb] * (ctable_space + abs(y-yi)* ctable_space_step)[abs(x-xi)];
|
const float weight = ctable_color[dist_rgb] * (ctable_space + ::abs(y-yi)* ctable_space_step)[::abs(x-xi)];
|
||||||
|
|
||||||
const T disp_reg = disp_y[xi];
|
const T disp_reg = disp_y[xi];
|
||||||
|
|
||||||
cost[0] += min(cmax_disc, abs(disp_reg - dp[0])) * weight;
|
cost[0] += ::min(cmax_disc, ::abs(disp_reg - dp[0])) * weight;
|
||||||
cost[1] += min(cmax_disc, abs(disp_reg - dp[1])) * weight;
|
cost[1] += ::min(cmax_disc, ::abs(disp_reg - dp[1])) * weight;
|
||||||
cost[2] += min(cmax_disc, abs(disp_reg - dp[2])) * weight;
|
cost[2] += ::min(cmax_disc, ::abs(disp_reg - dp[2])) * weight;
|
||||||
cost[3] += min(cmax_disc, abs(disp_reg - dp[3])) * weight;
|
cost[3] += ::min(cmax_disc, ::abs(disp_reg - dp[3])) * weight;
|
||||||
cost[4] += min(cmax_disc, abs(disp_reg - dp[4])) * weight;
|
cost[4] += ::min(cmax_disc, ::abs(disp_reg - dp[4])) * weight;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -181,12 +174,9 @@ namespace bf_krnls
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bf
|
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void bilateral_filter_caller(const DevMem2D_<T>& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_caller(DevMem2D_<T> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -198,18 +188,20 @@ namespace cv { namespace gpu { namespace bf
|
|||||||
case 1:
|
case 1:
|
||||||
for (int i = 0; i < iters; ++i)
|
for (int i = 0; i < iters; ++i)
|
||||||
{
|
{
|
||||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
bilateral_filter<1><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
bf_krnls::bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
|
||||||
|
bilateral_filter<1><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
for (int i = 0; i < iters; ++i)
|
for (int i = 0; i < iters; ++i)
|
||||||
{
|
{
|
||||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
bilateral_filter<3><<<grid, threads, 0, stream>>>(0, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
bf_krnls::bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
|
||||||
|
bilateral_filter<3><<<grid, threads, 0, stream>>>(1, disp.data, disp.step/sizeof(T), img.data, img.step, disp.rows, disp.cols);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -221,13 +213,16 @@ namespace cv { namespace gpu { namespace bf
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void bilateral_filter_gpu(const DevMem2Db& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_gpu(DevMem2Db disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void bilateral_filter_gpu(const DevMem2D_<short>& disp, const DevMem2Db& img, int channels, int iters, cudaStream_t stream)
|
void bilateral_filter_gpu(DevMem2D_<short> disp, DevMem2Db img, int channels, int iters, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
bilateral_filter_caller(disp, img, channels, iters, stream);
|
bilateral_filter_caller(disp, img, channels, iters, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
} // namespace bilateral_filter
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -42,10 +42,9 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace cv { namespace gpu
|
namespace blend {
|
||||||
{
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
|
||||||
@ -66,8 +65,7 @@ namespace cv { namespace gpu
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void blendLinearCaller(int rows, int cols, int cn, const PtrStep<T>& img1, const PtrStep<T>& img2,
|
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
|
||||||
const PtrStepf& weights1, const PtrStepf& weights2, PtrStep<T> result, cudaStream_t stream)
|
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
|
||||||
@ -79,10 +77,8 @@ namespace cv { namespace gpu
|
|||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
template void blendLinearCaller<uchar>(int, int, int, const PtrStep<uchar>&, const PtrStep<uchar>&,
|
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
|
||||||
const PtrStepf&, const PtrStepf&, PtrStep<uchar>, cudaStream_t stream);
|
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
|
||||||
template void blendLinearCaller<float>(int, int, int, const PtrStep<float>&, const PtrStep<float>&,
|
|
||||||
const PtrStepf&, const PtrStepf&, PtrStep<float>, cudaStream_t stream);
|
|
||||||
|
|
||||||
|
|
||||||
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
|
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
|
||||||
@ -105,9 +101,7 @@ namespace cv { namespace gpu
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
|
||||||
void blendLinearCaller8UC4(int rows, int cols, const PtrStepb& img1, const PtrStepb& img2,
|
|
||||||
const PtrStepf& weights1, const PtrStepf& weights2, PtrStepb result, cudaStream_t stream)
|
|
||||||
{
|
{
|
||||||
dim3 threads(16, 16);
|
dim3 threads(16, 16);
|
||||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||||
@ -119,4 +113,6 @@ namespace cv { namespace gpu
|
|||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
} // namespace blend
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -44,12 +44,10 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
||||||
|
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
namespace cv { namespace gpu
|
|
||||||
{
|
|
||||||
namespace transform_points
|
namespace transform_points
|
||||||
{
|
{
|
||||||
__constant__ float3 crot0;
|
__constant__ float3 crot0;
|
||||||
@ -76,11 +74,10 @@ namespace cv { namespace gpu
|
|||||||
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||||
transform(src, dst, TransformOp(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, TransformOp(), stream);
|
||||||
}
|
}
|
||||||
} // namespace transform_points
|
} // namespace transform_points
|
||||||
|
|
||||||
|
|
||||||
namespace project_points
|
namespace project_points
|
||||||
{
|
{
|
||||||
__constant__ float3 crot0;
|
__constant__ float3 crot0;
|
||||||
@ -116,11 +113,10 @@ namespace cv { namespace gpu
|
|||||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
||||||
transform(src, dst, ProjectOp(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, ProjectOp(), stream);
|
||||||
}
|
}
|
||||||
} // namespace project_points
|
} // namespace project_points
|
||||||
|
|
||||||
|
|
||||||
namespace solve_pnp_ransac
|
namespace solve_pnp_ransac
|
||||||
{
|
{
|
||||||
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
|
__constant__ float3 crot_matrices[SOLVE_PNP_RANSAC_MAX_NUM_ITERS * 3];
|
||||||
@ -193,4 +189,4 @@ namespace cv { namespace gpu
|
|||||||
}
|
}
|
||||||
} // namespace solvepnp_ransac
|
} // namespace solvepnp_ransac
|
||||||
|
|
||||||
}} // namespace cv { namespace gpu
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -44,10 +44,10 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace canny {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace canny
|
|
||||||
{
|
|
||||||
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
__global__ void calcSobelRowPass(const PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
||||||
{
|
{
|
||||||
__shared__ int smem[16][18];
|
__shared__ int smem[16][18];
|
||||||
@ -60,8 +60,8 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
smem[threadIdx.y][threadIdx.x + 1] = src.ptr(i)[j];
|
smem[threadIdx.y][threadIdx.x + 1] = src.ptr(i)[j];
|
||||||
if (threadIdx.x == 0)
|
if (threadIdx.x == 0)
|
||||||
{
|
{
|
||||||
smem[threadIdx.y][0] = src.ptr(i)[max(j - 1, 0)];
|
smem[threadIdx.y][0] = src.ptr(i)[::max(j - 1, 0)];
|
||||||
smem[threadIdx.y][17] = src.ptr(i)[min(j + 16, cols - 1)];
|
smem[threadIdx.y][17] = src.ptr(i)[::min(j + 16, cols - 1)];
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
@ -88,14 +88,14 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
{
|
{
|
||||||
static __device__ __forceinline__ float calc(int x, int y)
|
static __device__ __forceinline__ float calc(int x, int y)
|
||||||
{
|
{
|
||||||
return abs(x) + abs(y);
|
return ::abs(x) + ::abs(y);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct L2
|
struct L2
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float calc(int x, int y)
|
static __device__ __forceinline__ float calc(int x, int y)
|
||||||
{
|
{
|
||||||
return sqrtf(x * x + y * y);
|
return ::sqrtf(x * x + y * y);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -114,11 +114,11 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
sdy[threadIdx.y + 1][threadIdx.x] = dy_buf.ptr(i)[j];
|
sdy[threadIdx.y + 1][threadIdx.x] = dy_buf.ptr(i)[j];
|
||||||
if (threadIdx.y == 0)
|
if (threadIdx.y == 0)
|
||||||
{
|
{
|
||||||
sdx[0][threadIdx.x] = dx_buf.ptr(max(i - 1, 0))[j];
|
sdx[0][threadIdx.x] = dx_buf.ptr(::max(i - 1, 0))[j];
|
||||||
sdx[17][threadIdx.x] = dx_buf.ptr(min(i + 16, rows - 1))[j];
|
sdx[17][threadIdx.x] = dx_buf.ptr(::min(i + 16, rows - 1))[j];
|
||||||
|
|
||||||
sdy[0][threadIdx.x] = dy_buf.ptr(max(i - 1, 0))[j];
|
sdy[0][threadIdx.x] = dy_buf.ptr(::max(i - 1, 0))[j];
|
||||||
sdy[17][threadIdx.x] = dy_buf.ptr(min(i + 16, rows - 1))[j];
|
sdy[17][threadIdx.x] = dy_buf.ptr(::min(i + 16, rows - 1))[j];
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
@ -205,8 +205,8 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
const int s = (x ^ y) < 0 ? -1 : 1;
|
const int s = (x ^ y) < 0 ? -1 : 1;
|
||||||
const float m = smem[threadIdx.y + 1][threadIdx.x + 1];
|
const float m = smem[threadIdx.y + 1][threadIdx.x + 1];
|
||||||
|
|
||||||
x = abs(x);
|
x = ::abs(x);
|
||||||
y = abs(y);
|
y = ::abs(y);
|
||||||
|
|
||||||
// 0 - the pixel can not belong to an edge
|
// 0 - the pixel can not belong to an edge
|
||||||
// 1 - the pixel might belong to an edge
|
// 1 - the pixel might belong to an edge
|
||||||
@ -392,7 +392,7 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
while (s_counter > 0 && s_counter <= stack_size - blockDim.x)
|
while (s_counter > 0 && s_counter <= stack_size - blockDim.x)
|
||||||
{
|
{
|
||||||
const int subTaskIdx = threadIdx.x >> 3;
|
const int subTaskIdx = threadIdx.x >> 3;
|
||||||
const int portion = min(s_counter, blockDim.x >> 3);
|
const int portion = ::min(s_counter, blockDim.x >> 3);
|
||||||
|
|
||||||
pos.x = pos.y = 0;
|
pos.x = pos.y = 0;
|
||||||
|
|
||||||
@ -446,7 +446,7 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
|
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols)
|
||||||
{
|
{
|
||||||
void* counter_ptr;
|
void* counter_ptr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, "cv::gpu::canny::counter") );
|
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||||
|
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(unsigned int), cudaMemcpyDeviceToHost) );
|
||||||
@ -487,4 +487,7 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
cudaSafeCall(cudaThreadSynchronize());
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
} // namespace canny
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -44,177 +44,177 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/color.hpp"
|
#include "opencv2/gpu/device/color.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <> struct TransformFunctorTraits<bgra_to_rgba_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<bgra_to_rgba_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_x = 8 };
|
enum { smart_block_dim_x = 8 };
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<bgra_to_bgr555_traits::functor_type> : DefaultTransformFunctorTraits<bgra_to_bgr555_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<rgba_to_bgr555_traits::functor_type> : DefaultTransformFunctorTraits<rgba_to_bgr555_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<bgra_to_bgr565_traits::functor_type> : DefaultTransformFunctorTraits<bgra_to_bgr565_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<rgba_to_bgr565_traits::functor_type> : DefaultTransformFunctorTraits<rgba_to_bgr565_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<bgr555_to_bgra_traits::functor_type> : DefaultTransformFunctorTraits<bgr555_to_bgra_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<bgr555_to_rgba_traits::functor_type> : DefaultTransformFunctorTraits<bgr555_to_rgba_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<bgr565_to_bgra_traits::functor_type> : DefaultTransformFunctorTraits<bgr565_to_bgra_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<bgr565_to_rgba_traits::functor_type> : DefaultTransformFunctorTraits<bgr565_to_rgba_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<gray_to_bgra_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<gray_to_bgra_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<gray_to_bgr555_traits::functor_type> : DefaultTransformFunctorTraits<gray_to_bgr555_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<gray_to_bgr565_traits::functor_type> : DefaultTransformFunctorTraits<gray_to_bgr565_traits::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<bgra_to_yuv4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<bgra_to_yuv4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<rgba_to_yuv4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<rgba_to_yuv4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<yuv4_to_bgra_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<yuv4_to_bgra_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<yuv4_to_rgba_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<yuv4_to_rgba_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<bgra_to_YCrCb4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<bgra_to_YCrCb4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<rgba_to_YCrCb4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<rgba_to_YCrCb4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<YCrCb4_to_bgra_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<YCrCb4_to_bgra_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<YCrCb4_to_rgba_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<YCrCb4_to_rgba_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<bgra_to_xyz4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<bgra_to_xyz4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<rgba_to_xyz4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<rgba_to_xyz4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<xyz4_to_bgra_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<xyz4_to_bgra_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<xyz4_to_rgba_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<xyz4_to_rgba_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<bgra_to_hsv4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<bgra_to_hsv4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<rgba_to_hsv4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<rgba_to_hsv4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<hsv4_to_bgra_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<hsv4_to_bgra_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<hsv4_to_rgba_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<hsv4_to_rgba_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<bgra_to_hls4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<bgra_to_hls4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<rgba_to_hls4_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<rgba_to_hls4_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<hls4_to_bgra_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<hls4_to_bgra_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
};
|
};
|
||||||
template <> struct TransformFunctorTraits<hls4_to_rgba_traits<uchar>::functor_type> : DefaultTransformFunctorTraits<hls4_to_rgba_traits<uchar>::functor_type>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
enum { smart_shift = 4 };
|
enum { smart_shift = 4 };
|
||||||
@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
traits::functor_type functor = traits::create_functor(); \
|
traits::functor_type functor = traits::create_functor(); \
|
||||||
typedef typename traits::functor_type::argument_type src_t; \
|
typedef typename traits::functor_type::argument_type src_t; \
|
||||||
typedef typename traits::functor_type::result_type dst_t; \
|
typedef typename traits::functor_type::result_type dst_t; \
|
||||||
transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
||||||
@ -376,4 +376,5 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
|
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
|
||||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
|
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
|
||||||
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
|
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -47,8 +47,7 @@
|
|||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
#define MAX_KERNEL_SIZE 16
|
#define MAX_KERNEL_SIZE 16
|
||||||
#define BLOCK_DIM_X 16
|
#define BLOCK_DIM_X 16
|
||||||
@ -56,8 +55,8 @@ using namespace cv::gpu::device;
|
|||||||
#define RESULT_STEPS 8
|
#define RESULT_STEPS 8
|
||||||
#define HALO_STEPS 1
|
#define HALO_STEPS 1
|
||||||
|
|
||||||
namespace filter_column
|
namespace column_filter {
|
||||||
{
|
|
||||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
void loadKernel(const float kernel[], int ksize)
|
void loadKernel(const float kernel[], int ksize)
|
||||||
@ -113,10 +112,7 @@ namespace filter_column
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace filters
|
|
||||||
{
|
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int ksize, typename T, typename D, template<typename> class B>
|
||||||
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
@ -125,7 +121,7 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
|
|
||||||
B<T> b(src.rows);
|
B<T> b(src.rows);
|
||||||
|
|
||||||
filter_column::linearColumnFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
|
linearColumnFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@ -235,7 +231,7 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
filter_column::loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
@ -247,4 +243,7 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float3, short3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , int >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearColumnFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace column_filter
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -43,11 +43,10 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace copy_make_border {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
|
template <typename Ptr2D, typename T> __global__ void copyMakeBorder(const Ptr2D src, DevMem2D_<T> dst, int top, int left)
|
||||||
{
|
{
|
||||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
@ -124,4 +123,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
//template void copyMakeBorder_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
//template void copyMakeBorder_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
template void copyMakeBorder_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
template void copyMakeBorder_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace copy_make_border
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -40,15 +40,15 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "internal_shared.hpp"
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// add
|
// add
|
||||||
|
|
||||||
@ -84,9 +84,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <typename T, typename D> void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
|
template <typename T, typename D> void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Add<T, D>(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Add<T, D>(), stream);
|
||||||
else
|
else
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void add_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
template void add_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
@ -181,9 +181,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||||
AddScalar<T, D> op(val);
|
AddScalar<T, D> op(val);
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
|
||||||
else
|
else
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void add_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
template void add_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
@ -277,9 +277,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <typename T, typename D> void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
|
template <typename T, typename D> void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Subtract<T, D>(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Subtract<T, D>(), stream);
|
||||||
else
|
else
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
@ -374,9 +374,9 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||||
SubtractScalar<T, D> op(val);
|
SubtractScalar<T, D> op(val);
|
||||||
if (mask.data)
|
if (mask.data)
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
|
||||||
else
|
else
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
@ -453,7 +453,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<multiply_8uc4_32f> : DefaultTransformFunctorTraits<multiply_8uc4_32f>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(multiply_8uc4_32f)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_x = 8 };
|
enum { smart_block_dim_x = 8 };
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
@ -474,7 +474,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<multiply_16sc4_32f> : DefaultTransformFunctorTraits<multiply_16sc4_32f>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(multiply_16sc4_32f)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_x = 8 };
|
enum { smart_block_dim_x = 8 };
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
@ -522,7 +522,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||||
Multiply<T, D> op(scale);
|
Multiply<T, D> op(scale);
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
@ -618,7 +618,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||||
MultiplyScalar<T, D> op(val, scale);
|
MultiplyScalar<T, D> op(val, scale);
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
@ -690,7 +690,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<divide_8uc4_32f> : DefaultTransformFunctorTraits<divide_8uc4_32f>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(divide_8uc4_32f)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_x = 8 };
|
enum { smart_block_dim_x = 8 };
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
@ -713,7 +713,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <> struct TransformFunctorTraits<divide_16sc4_32f> : DefaultTransformFunctorTraits<divide_16sc4_32f>
|
DEFINE_TRANSFORM_FUNCTOR_TRAITS(divide_16sc4_32f)
|
||||||
{
|
{
|
||||||
enum { smart_block_dim_x = 8 };
|
enum { smart_block_dim_x = 8 };
|
||||||
enum { smart_block_dim_y = 8 };
|
enum { smart_block_dim_y = 8 };
|
||||||
@ -760,7 +760,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||||
Divide<T, D> op(scale);
|
Divide<T, D> op(scale);
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
@ -856,7 +856,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||||
DivideScalar<T, D> op(val, scale);
|
DivideScalar<T, D> op(val, scale);
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
@ -950,7 +950,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&scalar) );
|
cudaSafeCall( cudaSetDoubleForDevice(&scalar) );
|
||||||
Reciprocal<T, D> op(scalar);
|
Reciprocal<T, D> op(scalar);
|
||||||
transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void divide_gpu<uchar, uchar >(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void divide_gpu<uchar, uchar >(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -1029,7 +1029,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
__device__ __forceinline__ T operator ()(T a, T b) const
|
__device__ __forceinline__ T operator ()(T a, T b) const
|
||||||
{
|
{
|
||||||
return saturate_cast<T>(abs(a - b));
|
return saturate_cast<T>(::abs(a - b));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -1056,7 +1056,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T> void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
template <typename T> void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -1102,7 +1102,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||||
AbsdiffScalar<T> op(val);
|
AbsdiffScalar<T> op(val);
|
||||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -1189,7 +1189,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <template <typename> class Op, typename T> void compare(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
template <template <typename> class Op, typename T> void compare(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
Op<T> op;
|
Op<T> op;
|
||||||
transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
||||||
@ -1548,7 +1548,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
transform(src1, src2, dst, minimum<T>(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(src1, src2, dst, minimum<T>(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -1562,7 +1562,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
transform(src1, src2, dst, maximum<T>(), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(src1, src2, dst, maximum<T>(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -1576,7 +1576,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -1590,7 +1590,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -1642,7 +1642,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
cudaStream_t stream)
|
cudaStream_t stream)
|
||||||
{
|
{
|
||||||
Op<T> op(thresh, maxVal);
|
Op<T> op(thresh, maxVal);
|
||||||
transform(src, dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(src, dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -1743,7 +1743,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template<typename T>
|
template<typename T>
|
||||||
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream)
|
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void pow_caller<uchar>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
template void pow_caller<uchar>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||||
@ -1838,7 +1838,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
AddWeighted<T1, T2, D> op(alpha, beta, gamma);
|
AddWeighted<T1, T2, D> op(alpha, beta, gamma);
|
||||||
|
|
||||||
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template void addWeighted_gpu<uchar, uchar, uchar>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
template void addWeighted_gpu<uchar, uchar, uchar>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
@ -2076,4 +2076,5 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template void addWeighted_gpu<double, double, int>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
template void addWeighted_gpu<double, double, int>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template void addWeighted_gpu<double, double, float>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
template void addWeighted_gpu<double, double, float>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template void addWeighted_gpu<double, double, double>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
template void addWeighted_gpu<double, double, double>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -45,9 +45,7 @@
|
|||||||
#include "opencv2/gpu/device/utility.hpp"
|
#include "opencv2/gpu/device/utility.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
#define UINT_BITS 32U
|
#define UINT_BITS 32U
|
||||||
|
|
||||||
@ -67,8 +65,8 @@ using namespace cv::gpu::device;
|
|||||||
|
|
||||||
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
|
#define USE_SMEM_ATOMICS (__CUDA_ARCH__ >= 120)
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace histograms
|
namespace hist {
|
||||||
{
|
|
||||||
#if (!USE_SMEM_ATOMICS)
|
#if (!USE_SMEM_ATOMICS)
|
||||||
|
|
||||||
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
||||||
@ -209,7 +207,7 @@ namespace cv { namespace gpu { namespace histograms
|
|||||||
dim3 block(16, 16);
|
dim3 block(16, 16);
|
||||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(cv::gpu::histograms::c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
||||||
|
|
||||||
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
|
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
@ -217,4 +215,7 @@ namespace cv { namespace gpu { namespace histograms
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
} // namespace hist
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -42,13 +42,15 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
// Other values are not supported
|
// Other values are not supported
|
||||||
#define CELL_WIDTH 8
|
#define CELL_WIDTH 8
|
||||||
#define CELL_HEIGHT 8
|
#define CELL_HEIGHT 8
|
||||||
#define CELLS_PER_BLOCK_X 2
|
#define CELLS_PER_BLOCK_X 2
|
||||||
#define CELLS_PER_BLOCK_Y 2
|
#define CELLS_PER_BLOCK_Y 2
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace hog {
|
namespace hog {
|
||||||
|
|
||||||
__constant__ int cnbins;
|
__constant__ int cnbins;
|
||||||
__constant__ int cblock_stride_x;
|
__constant__ int cblock_stride_x;
|
||||||
@ -83,23 +85,23 @@ int power_2up(unsigned int n)
|
|||||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int nblocks_win_x, int nblocks_win_y)
|
int nblocks_win_x, int nblocks_win_y)
|
||||||
{
|
{
|
||||||
uploadConstant("cv::gpu::hog::cnbins", nbins);
|
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||||
uploadConstant("cv::gpu::hog::cblock_stride_x", block_stride_x);
|
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||||
uploadConstant("cv::gpu::hog::cblock_stride_y", block_stride_y);
|
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||||
uploadConstant("cv::gpu::hog::cnblocks_win_x", nblocks_win_x);
|
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
|
||||||
uploadConstant("cv::gpu::hog::cnblocks_win_y", nblocks_win_y);
|
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
|
||||||
|
|
||||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||||
uploadConstant("cv::gpu::hog::cblock_hist_size", block_hist_size);
|
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
|
||||||
|
|
||||||
int block_hist_size_2up = power_2up(block_hist_size);
|
int block_hist_size_2up = power_2up(block_hist_size);
|
||||||
uploadConstant("cv::gpu::hog::cblock_hist_size_2up", block_hist_size_2up);
|
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size_2up, &block_hist_size_2up, sizeof(block_hist_size_2up)) );
|
||||||
|
|
||||||
int descr_width = nblocks_win_x * block_hist_size;
|
int descr_width = nblocks_win_x * block_hist_size;
|
||||||
uploadConstant("cv::gpu::hog::cdescr_width", descr_width);
|
cudaSafeCall( cudaMemcpyToSymbol(cdescr_width, &descr_width, sizeof(descr_width)) );
|
||||||
|
|
||||||
int descr_size = descr_width * nblocks_win_y;
|
int descr_size = descr_width * nblocks_win_y;
|
||||||
uploadConstant("cv::gpu::hog::cdescr_size", descr_size);
|
cudaSafeCall( cudaMemcpyToSymbol(cdescr_size, &descr_size, sizeof(descr_size)) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -153,10 +155,10 @@ __global__ void compute_hists_kernel_many_blocks(const int img_block_width, cons
|
|||||||
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
|
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
|
||||||
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
|
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
|
||||||
|
|
||||||
float gaussian = expf(-(dist_center_y * dist_center_y +
|
float gaussian = ::expf(-(dist_center_y * dist_center_y +
|
||||||
dist_center_x * dist_center_x) * scale);
|
dist_center_x * dist_center_x) * scale);
|
||||||
float interp_weight = (8.f - fabs(dist_y + 0.5f)) *
|
float interp_weight = (8.f - ::fabs(dist_y + 0.5f)) *
|
||||||
(8.f - fabs(dist_x + 0.5f)) / 64.f;
|
(8.f - ::fabs(dist_x + 0.5f)) / 64.f;
|
||||||
|
|
||||||
hist[bin.x * 48 * nblocks] += gaussian * interp_weight * vote.x;
|
hist[bin.x * 48 * nblocks] += gaussian * interp_weight * vote.x;
|
||||||
hist[bin.y * 48 * nblocks] += gaussian * interp_weight * vote.y;
|
hist[bin.y * 48 * nblocks] += gaussian * interp_weight * vote.y;
|
||||||
@ -273,15 +275,15 @@ __global__ void normalize_hists_kernel_many_blocks(const int block_hist_size,
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
float sum = reduce_smem<nthreads>(squares);
|
float sum = reduce_smem<nthreads>(squares);
|
||||||
|
|
||||||
float scale = 1.0f / (sqrtf(sum) + 0.1f * block_hist_size);
|
float scale = 1.0f / (::sqrtf(sum) + 0.1f * block_hist_size);
|
||||||
elem = min(elem * scale, threshold);
|
elem = ::min(elem * scale, threshold);
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
squares[threadIdx.x] = elem * elem;
|
squares[threadIdx.x] = elem * elem;
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
sum = reduce_smem<nthreads>(squares);
|
sum = reduce_smem<nthreads>(squares);
|
||||||
scale = 1.0f / (sqrtf(sum) + 1e-3f);
|
scale = 1.0f / (::sqrtf(sum) + 1e-3f);
|
||||||
|
|
||||||
if (threadIdx.x < block_hist_size)
|
if (threadIdx.x < block_hist_size)
|
||||||
hist[0] = elem * scale;
|
hist[0] = elem * scale;
|
||||||
@ -533,7 +535,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
|
|||||||
|
|
||||||
if (threadIdx.x == 0)
|
if (threadIdx.x == 0)
|
||||||
{
|
{
|
||||||
val = row[max(x - 1, 1)];
|
val = row[::max(x - 1, 1)];
|
||||||
sh_row[0] = val.x;
|
sh_row[0] = val.x;
|
||||||
sh_row[(nthreads + 2)] = val.y;
|
sh_row[(nthreads + 2)] = val.y;
|
||||||
sh_row[2 * (nthreads + 2)] = val.z;
|
sh_row[2 * (nthreads + 2)] = val.z;
|
||||||
@ -541,7 +543,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
|
|||||||
|
|
||||||
if (threadIdx.x == blockDim.x - 1)
|
if (threadIdx.x == blockDim.x - 1)
|
||||||
{
|
{
|
||||||
val = row[min(x + 1, width - 2)];
|
val = row[::min(x + 1, width - 2)];
|
||||||
sh_row[blockDim.x + 1] = val.x;
|
sh_row[blockDim.x + 1] = val.x;
|
||||||
sh_row[blockDim.x + 1 + (nthreads + 2)] = val.y;
|
sh_row[blockDim.x + 1 + (nthreads + 2)] = val.y;
|
||||||
sh_row[blockDim.x + 1 + 2 * (nthreads + 2)] = val.z;
|
sh_row[blockDim.x + 1 + 2 * (nthreads + 2)] = val.z;
|
||||||
@ -561,7 +563,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
|
|||||||
|
|
||||||
float3 dx;
|
float3 dx;
|
||||||
if (correct_gamma)
|
if (correct_gamma)
|
||||||
dx = make_float3(sqrtf(b.x) - sqrtf(a.x), sqrtf(b.y) - sqrtf(a.y), sqrtf(b.z) - sqrtf(a.z));
|
dx = make_float3(::sqrtf(b.x) - ::sqrtf(a.x), ::sqrtf(b.y) - ::sqrtf(a.y), ::sqrtf(b.z) - ::sqrtf(a.z));
|
||||||
else
|
else
|
||||||
dx = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
|
dx = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
|
||||||
|
|
||||||
@ -576,7 +578,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
|
|||||||
b = make_float3(val.x, val.y, val.z);
|
b = make_float3(val.x, val.y, val.z);
|
||||||
|
|
||||||
if (correct_gamma)
|
if (correct_gamma)
|
||||||
dy = make_float3(sqrtf(b.x) - sqrtf(a.x), sqrtf(b.y) - sqrtf(a.y), sqrtf(b.z) - sqrtf(a.z));
|
dy = make_float3(::sqrtf(b.x) - ::sqrtf(a.x), ::sqrtf(b.y) - ::sqrtf(a.y), ::sqrtf(b.z) - ::sqrtf(a.z));
|
||||||
else
|
else
|
||||||
dy = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
|
dy = make_float3(b.x - a.x, b.y - a.y, b.z - a.z);
|
||||||
}
|
}
|
||||||
@ -601,10 +603,10 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
|
|||||||
mag0 = mag1;
|
mag0 = mag1;
|
||||||
}
|
}
|
||||||
|
|
||||||
mag0 = sqrtf(mag0);
|
mag0 = ::sqrtf(mag0);
|
||||||
|
|
||||||
float ang = (atan2f(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f;
|
float ang = (::atan2f(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f;
|
||||||
int hidx = (int)floorf(ang);
|
int hidx = (int)::floorf(ang);
|
||||||
ang -= hidx;
|
ang -= hidx;
|
||||||
hidx = (hidx + cnbins) % cnbins;
|
hidx = (hidx + cnbins) % cnbins;
|
||||||
|
|
||||||
@ -648,10 +650,10 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl
|
|||||||
sh_row[threadIdx.x + 1] = row[width - 2];
|
sh_row[threadIdx.x + 1] = row[width - 2];
|
||||||
|
|
||||||
if (threadIdx.x == 0)
|
if (threadIdx.x == 0)
|
||||||
sh_row[0] = row[max(x - 1, 1)];
|
sh_row[0] = row[::max(x - 1, 1)];
|
||||||
|
|
||||||
if (threadIdx.x == blockDim.x - 1)
|
if (threadIdx.x == blockDim.x - 1)
|
||||||
sh_row[blockDim.x + 1] = row[min(x + 1, width - 2)];
|
sh_row[blockDim.x + 1] = row[::min(x + 1, width - 2)];
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
if (x < width)
|
if (x < width)
|
||||||
@ -659,7 +661,7 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl
|
|||||||
float dx;
|
float dx;
|
||||||
|
|
||||||
if (correct_gamma)
|
if (correct_gamma)
|
||||||
dx = sqrtf(sh_row[threadIdx.x + 2]) - sqrtf(sh_row[threadIdx.x]);
|
dx = ::sqrtf(sh_row[threadIdx.x + 2]) - ::sqrtf(sh_row[threadIdx.x]);
|
||||||
else
|
else
|
||||||
dx = sh_row[threadIdx.x + 2] - sh_row[threadIdx.x];
|
dx = sh_row[threadIdx.x + 2] - sh_row[threadIdx.x];
|
||||||
|
|
||||||
@ -669,14 +671,14 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl
|
|||||||
float a = ((const unsigned char*)img.ptr(blockIdx.y + 1))[x];
|
float a = ((const unsigned char*)img.ptr(blockIdx.y + 1))[x];
|
||||||
float b = ((const unsigned char*)img.ptr(blockIdx.y - 1))[x];
|
float b = ((const unsigned char*)img.ptr(blockIdx.y - 1))[x];
|
||||||
if (correct_gamma)
|
if (correct_gamma)
|
||||||
dy = sqrtf(a) - sqrtf(b);
|
dy = ::sqrtf(a) - ::sqrtf(b);
|
||||||
else
|
else
|
||||||
dy = a - b;
|
dy = a - b;
|
||||||
}
|
}
|
||||||
float mag = sqrtf(dx * dx + dy * dy);
|
float mag = ::sqrtf(dx * dx + dy * dy);
|
||||||
|
|
||||||
float ang = (atan2f(dy, dx) + CV_PI_F) * angle_scale - 0.5f;
|
float ang = (::atan2f(dy, dx) + CV_PI_F) * angle_scale - 0.5f;
|
||||||
int hidx = (int)floorf(ang);
|
int hidx = (int)::floorf(ang);
|
||||||
ang -= hidx;
|
ang -= hidx;
|
||||||
hidx = (hidx + cnbins) % cnbins;
|
hidx = (hidx + cnbins) % cnbins;
|
||||||
|
|
||||||
@ -768,4 +770,6 @@ static void resize_for_hog(const DevMem2Db& src, DevMem2Db dst, TEX& tex)
|
|||||||
void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
void resize_8UC1(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
|
||||||
void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
|
void resize_8UC4(const DevMem2Db& src, DevMem2Db dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
|
||||||
|
|
||||||
}}}
|
} // namespace hog
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -46,11 +46,10 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace imgproc {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||||
|
|
||||||
texture<uchar4, 2> tex_meanshift;
|
texture<uchar4, 2> tex_meanshift;
|
||||||
@ -105,7 +104,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
|
|
||||||
int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
|
int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
|
||||||
|
|
||||||
bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1-x0) + abs(y1-y0) + norm2 <= eps);
|
bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps);
|
||||||
|
|
||||||
x0 = x1; y0 = y1;
|
x0 = x1; y0 = y1;
|
||||||
c.x = s0; c.y = s1; c.z = s2;
|
c.x = s0; c.y = s1; c.z = s2;
|
||||||
@ -238,9 +237,9 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
res.y = p;
|
res.y = p;
|
||||||
res.z = V;
|
res.z = V;
|
||||||
}
|
}
|
||||||
const unsigned int b = (unsigned int)(max(0.f, min (res.x, 1.f)) * 255.f);
|
const unsigned int b = (unsigned int)(::max(0.f, ::min(res.x, 1.f)) * 255.f);
|
||||||
const unsigned int g = (unsigned int)(max(0.f, min (res.y, 1.f)) * 255.f);
|
const unsigned int g = (unsigned int)(::max(0.f, ::min(res.y, 1.f)) * 255.f);
|
||||||
const unsigned int r = (unsigned int)(max(0.f, min (res.z, 1.f)) * 255.f);
|
const unsigned int r = (unsigned int)(::max(0.f, ::min(res.z, 1.f)) * 255.f);
|
||||||
const unsigned int a = 255U;
|
const unsigned int a = 255U;
|
||||||
|
|
||||||
return (a << 24) + (r << 16) + (g << 8) + b;
|
return (a << 24) + (r << 16) + (g << 8) + b;
|
||||||
@ -830,9 +829,9 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
using namespace build_warp_maps;
|
using namespace build_warp_maps;
|
||||||
|
|
||||||
u /= cscale;
|
u /= cscale;
|
||||||
float x_ = sinf(u);
|
float x_ = ::sinf(u);
|
||||||
float y_ = v / cscale;
|
float y_ = v / cscale;
|
||||||
float z_ = cosf(u);
|
float z_ = ::cosf(u);
|
||||||
|
|
||||||
float z;
|
float z;
|
||||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||||
@ -855,10 +854,10 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
v /= cscale;
|
v /= cscale;
|
||||||
u /= cscale;
|
u /= cscale;
|
||||||
|
|
||||||
float sinv = sinf(v);
|
float sinv = ::sinf(v);
|
||||||
float x_ = sinv * sinf(u);
|
float x_ = sinv * ::sinf(u);
|
||||||
float y_ = -cosf(v);
|
float y_ = -::cosf(v);
|
||||||
float z_ = sinv * cosf(u);
|
float z_ = sinv * ::cosf(u);
|
||||||
|
|
||||||
float z;
|
float z;
|
||||||
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
x = ck_rinv[0] * x_ + ck_rinv[1] * y_ + ck_rinv[2] * z_;
|
||||||
@ -973,7 +972,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
// 0 | 0 0 | 0
|
// 0 | 0 0 | 0
|
||||||
// -----------
|
// -----------
|
||||||
// 0 | 0 0 | 0
|
// 0 | 0 0 | 0
|
||||||
smem[threadIdx.y][threadIdx.x] = src.ptr(min(max(y - 8, 0), src.rows - 1))[min(max(x - 8, 0), src.cols - 1)];
|
smem[threadIdx.y][threadIdx.x] = src.ptr(::min(::max(y - 8, 0), src.rows - 1))[::min(::max(x - 8, 0), src.cols - 1)];
|
||||||
|
|
||||||
// 0 | 0 x | x
|
// 0 | 0 x | x
|
||||||
// -----------
|
// -----------
|
||||||
@ -981,7 +980,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
// 0 | 0 0 | 0
|
// 0 | 0 0 | 0
|
||||||
// -----------
|
// -----------
|
||||||
// 0 | 0 0 | 0
|
// 0 | 0 0 | 0
|
||||||
smem[threadIdx.y][threadIdx.x + 16] = src.ptr(min(max(y - 8, 0), src.rows - 1))[min(x + 8, src.cols - 1)];
|
smem[threadIdx.y][threadIdx.x + 16] = src.ptr(::min(::max(y - 8, 0), src.rows - 1))[::min(x + 8, src.cols - 1)];
|
||||||
|
|
||||||
// 0 | 0 0 | 0
|
// 0 | 0 0 | 0
|
||||||
// -----------
|
// -----------
|
||||||
@ -989,7 +988,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
// x | x 0 | 0
|
// x | x 0 | 0
|
||||||
// -----------
|
// -----------
|
||||||
// x | x 0 | 0
|
// x | x 0 | 0
|
||||||
smem[threadIdx.y + 16][threadIdx.x] = src.ptr(min(y + 8, src.rows - 1))[min(max(x - 8, 0), src.cols - 1)];
|
smem[threadIdx.y + 16][threadIdx.x] = src.ptr(::min(y + 8, src.rows - 1))[::min(::max(x - 8, 0), src.cols - 1)];
|
||||||
|
|
||||||
// 0 | 0 0 | 0
|
// 0 | 0 0 | 0
|
||||||
// -----------
|
// -----------
|
||||||
@ -997,7 +996,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
// 0 | 0 x | x
|
// 0 | 0 x | x
|
||||||
// -----------
|
// -----------
|
||||||
// 0 | 0 x | x
|
// 0 | 0 x | x
|
||||||
smem[threadIdx.y + 16][threadIdx.x + 16] = src.ptr(min(y + 8, src.rows - 1))[min(x + 8, src.cols - 1)];
|
smem[threadIdx.y + 16][threadIdx.x + 16] = src.ptr(::min(y + 8, src.rows - 1))[::min(x + 8, src.cols - 1)];
|
||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
@ -1031,7 +1030,6 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
cudaSafeCall(cudaDeviceSynchronize());
|
cudaSafeCall(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace imgproc
|
||||||
|
|
||||||
}}}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
|
||||||
|
@ -43,11 +43,15 @@
|
|||||||
#ifndef __OPENCV_internal_shared_HPP__
|
#ifndef __OPENCV_internal_shared_HPP__
|
||||||
#define __OPENCV_internal_shared_HPP__
|
#define __OPENCV_internal_shared_HPP__
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <npp.h>
|
||||||
|
#include "NPP_staging.hpp"
|
||||||
#include "opencv2/gpu/devmem2d.hpp"
|
#include "opencv2/gpu/devmem2d.hpp"
|
||||||
#include "safe_call.hpp"
|
#include "safe_call.hpp"
|
||||||
#include "cuda_runtime.h"
|
|
||||||
#include "npp.h"
|
#ifndef CV_PI
|
||||||
#include "NPP_staging.hpp"
|
#define CV_PI 3.1415926535897932384626433832795f
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef CV_PI_F
|
#ifndef CV_PI_F
|
||||||
#ifndef CV_PI
|
#ifndef CV_PI
|
||||||
@ -57,15 +61,28 @@
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv
|
#define BEGIN_OPENCV_DEVICE_NAMESPACE namespace cv { namespace gpu { namespace device {
|
||||||
{
|
#define END_OPENCV_DEVICE_NAMESPACE }}}
|
||||||
namespace gpu
|
#define OPENCV_DEVICE_NAMESPACE ::cv::gpu::device
|
||||||
{
|
#define OPENCV_DEVICE_NAMESPACE_ ::cv::gpu::device::
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
typedef unsigned char uchar;
|
typedef unsigned char uchar;
|
||||||
typedef signed char schar;
|
|
||||||
typedef unsigned short ushort;
|
typedef unsigned short ushort;
|
||||||
|
typedef signed char schar;
|
||||||
typedef unsigned int uint;
|
typedef unsigned int uint;
|
||||||
|
|
||||||
|
template<class T> static inline void bindTexture(const textureReference* tex, const DevMem2D_<T>& img)
|
||||||
|
{
|
||||||
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||||
|
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||||
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace cv { namespace gpu
|
||||||
|
{
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
BORDER_REFLECT101_GPU = 0,
|
BORDER_REFLECT101_GPU = 0,
|
||||||
@ -81,7 +98,7 @@ namespace cv
|
|||||||
|
|
||||||
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
||||||
|
|
||||||
template<class T> static inline void uploadConstant(const char* name, const T& value)
|
/*template<class T> static inline void uploadConstant(const char* name, const T& value)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) );
|
cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) );
|
||||||
}
|
}
|
||||||
@ -89,77 +106,78 @@ namespace cv
|
|||||||
template<class T> static inline void uploadConstant(const char* name, const T& value, cudaStream_t stream)
|
template<class T> static inline void uploadConstant(const char* name, const T& value, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemcpyToSymbolAsync(name, &value, sizeof(T), 0, cudaMemcpyHostToDevice, stream) );
|
cudaSafeCall( cudaMemcpyToSymbolAsync(name, &value, sizeof(T), 0, cudaMemcpyHostToDevice, stream) );
|
||||||
}
|
} */
|
||||||
|
|
||||||
template<class T> static inline void bindTexture(const char* name, const DevMem2D_<T>& img/*, bool normalized = false,
|
//template<class T> static inline void bindTexture(const char* name, const DevMem2D_<T>& img)
|
||||||
enum cudaTextureFilterMode filterMode = cudaFilterModePoint, enum cudaTextureAddressMode addrMode = cudaAddressModeClamp*/)
|
//{
|
||||||
{
|
// //!!!! const_cast is disabled!
|
||||||
//!!!! const_cast is disabled!
|
// //!!!! Please use constructor of 'class texture' instead.
|
||||||
//!!!! Please use constructor of 'class texture' instead.
|
//
|
||||||
|
// //textureReference* tex;
|
||||||
|
// //cudaSafeCall( cudaGetTextureReference((const textureReference**)&tex, name) );
|
||||||
|
// //tex->normalized = normalized;
|
||||||
|
// //tex->filterMode = filterMode;
|
||||||
|
// //tex->addressMode[0] = addrMode;
|
||||||
|
// //tex->addressMode[1] = addrMode;
|
||||||
|
//
|
||||||
|
// const textureReference* tex;
|
||||||
|
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
||||||
|
//
|
||||||
|
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||||
|
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||||
|
//}
|
||||||
|
|
||||||
//textureReference* tex;
|
//static inline void unbindTexture(const char *name)
|
||||||
//cudaSafeCall( cudaGetTextureReference((const textureReference**)&tex, name) );
|
//{
|
||||||
//tex->normalized = normalized;
|
// const textureReference* tex;
|
||||||
//tex->filterMode = filterMode;
|
// cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
||||||
//tex->addressMode[0] = addrMode;
|
// cudaSafeCall( cudaUnbindTexture(tex) );
|
||||||
//tex->addressMode[1] = addrMode;
|
//}
|
||||||
|
|
||||||
const textureReference* tex;
|
|
||||||
cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
|
||||||
|
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
|
||||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void unbindTexture(const char *name)
|
//class TextureBinder
|
||||||
{
|
//{
|
||||||
const textureReference* tex;
|
//public:
|
||||||
cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
// TextureBinder() : tex_(0) {}
|
||||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
// template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
|
||||||
}
|
// {
|
||||||
|
// bind(tex, img);
|
||||||
class TextureBinder
|
// }
|
||||||
{
|
// template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
|
||||||
public:
|
// {
|
||||||
TextureBinder() : tex_(0) {}
|
// bind(tex_name, img);
|
||||||
template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
|
// }
|
||||||
{
|
// ~TextureBinder() { unbind(); }
|
||||||
bind(tex, img);
|
//
|
||||||
}
|
// template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
|
||||||
template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
|
// {
|
||||||
{
|
// unbind();
|
||||||
bind(tex_name, img);
|
//
|
||||||
}
|
// cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||||
~TextureBinder() { unbind(); }
|
// cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||||
|
//
|
||||||
template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
|
// tex_ = tex;
|
||||||
{
|
// }
|
||||||
unbind();
|
// template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
|
||||||
|
// {
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
// const textureReference* tex;
|
||||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
// cudaSafeCall( cudaGetTextureReference(&tex, tex_name) );
|
||||||
|
// bind(tex, img);
|
||||||
tex_ = tex;
|
// }
|
||||||
}
|
//
|
||||||
template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
|
// void unbind()
|
||||||
{
|
// {
|
||||||
const textureReference* tex;
|
// if (tex_)
|
||||||
cudaSafeCall( cudaGetTextureReference(&tex, tex_name) );
|
// {
|
||||||
bind(tex, img);
|
// cudaUnbindTexture(tex_);
|
||||||
}
|
// tex_ = 0;
|
||||||
|
// }
|
||||||
void unbind()
|
// }
|
||||||
{
|
//
|
||||||
if (tex_)
|
//private:
|
||||||
{
|
// const textureReference* tex_;
|
||||||
cudaUnbindTexture(tex_);
|
//};
|
||||||
tex_ = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const textureReference* tex_;
|
|
||||||
};
|
|
||||||
|
|
||||||
class NppStreamHandler
|
class NppStreamHandler
|
||||||
{
|
{
|
||||||
@ -195,8 +213,6 @@ namespace cv
|
|||||||
private:
|
private:
|
||||||
cudaStream_t oldStream;
|
cudaStream_t oldStream;
|
||||||
};
|
};
|
||||||
}
|
}}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* __OPENCV_internal_shared_HPP__ */
|
#endif /* __OPENCV_internal_shared_HPP__ */
|
||||||
|
@ -43,10 +43,9 @@
|
|||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc {
|
namespace match_template {
|
||||||
|
|
||||||
__device__ __forceinline__ float sum(float v) { return v; }
|
__device__ __forceinline__ float sum(float v) { return v; }
|
||||||
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
|
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
|
||||||
@ -266,9 +265,9 @@ void matchTemplatePrepared_SQDIFF_8U(int w, int h, const DevMem2D_<unsigned long
|
|||||||
|
|
||||||
__device__ float normAcc(float num, float denum)
|
__device__ float normAcc(float num, float denum)
|
||||||
{
|
{
|
||||||
if (fabs(num) < denum)
|
if (::fabs(num) < denum)
|
||||||
return num / denum;
|
return num / denum;
|
||||||
if (fabs(num) < denum * 1.125f)
|
if (::fabs(num) < denum * 1.125f)
|
||||||
return num > 0 ? 1 : -1;
|
return num > 0 ? 1 : -1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -276,9 +275,9 @@ __device__ float normAcc(float num, float denum)
|
|||||||
|
|
||||||
__device__ float normAcc_SQDIFF(float num, float denum)
|
__device__ float normAcc_SQDIFF(float num, float denum)
|
||||||
{
|
{
|
||||||
if (fabs(num) < denum)
|
if (::fabs(num) < denum)
|
||||||
return num / denum;
|
return num / denum;
|
||||||
if (fabs(num) < denum * 1.125f)
|
if (::fabs(num) < denum * 1.125f)
|
||||||
return num > 0 ? 1 : -1;
|
return num > 0 ? 1 : -1;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -906,4 +905,7 @@ void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cu
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
} //namespace match_template
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -42,17 +42,13 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#ifndef CV_PI
|
namespace mathfunc {
|
||||||
#define CV_PI 3.1415926535897932384626433832795f
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Cart <-> Polar
|
// Cart <-> Polar
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc
|
|
||||||
{
|
|
||||||
struct Nothing
|
struct Nothing
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
||||||
@ -63,7 +59,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
{
|
{
|
||||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||||
{
|
{
|
||||||
dst[y * dst_step + x] = sqrtf(x_data * x_data + y_data * y_data);
|
dst[y * dst_step + x] = ::sqrtf(x_data * x_data + y_data * y_data);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
struct MagnitudeSqr
|
struct MagnitudeSqr
|
||||||
@ -77,7 +73,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
{
|
{
|
||||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
||||||
{
|
{
|
||||||
float angle = atan2f(y_data, x_data);
|
float angle = ::atan2f(y_data, x_data);
|
||||||
angle += (angle < 0) * 2.0 * CV_PI;
|
angle += (angle < 0) * 2.0 * CV_PI;
|
||||||
dst[y * dst_step + x] = scale * angle;
|
dst[y * dst_step + x] = scale * angle;
|
||||||
}
|
}
|
||||||
@ -126,7 +122,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
float angle_data = angle[y * angle_step + x];
|
float angle_data = angle[y * angle_step + x];
|
||||||
float sin_a, cos_a;
|
float sin_a, cos_a;
|
||||||
|
|
||||||
sincosf(scale * angle_data, &sin_a, &cos_a);
|
::sincosf(scale * angle_data, &sin_a, &cos_a);
|
||||||
|
|
||||||
xptr[y * x_step + x] = mag_data * cos_a;
|
xptr[y * x_step + x] = mag_data * cos_a;
|
||||||
yptr[y * y_step + x] = mag_data * sin_a;
|
yptr[y * y_step + x] = mag_data * sin_a;
|
||||||
@ -134,7 +130,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Mag, typename Angle>
|
template <typename Mag, typename Angle>
|
||||||
void cartToPolar_caller(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_caller(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -153,9 +149,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void cartToPolar_gpu(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, bool magSqr, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream)
|
void cartToPolar_gpu(DevMem2Df x, DevMem2Df y, DevMem2Df mag, bool magSqr, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Df& x, const DevMem2Df& y, const DevMem2Df& mag, const DevMem2Df& angle, bool angleInDegrees, cudaStream_t stream);
|
typedef void (*caller_t)(DevMem2Df x, DevMem2Df y, DevMem2Df mag, DevMem2Df angle, bool angleInDegrees, cudaStream_t stream);
|
||||||
static const caller_t callers[2][2][2] =
|
static const caller_t callers[2][2][2] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
@ -184,7 +180,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename Mag>
|
template <typename Mag>
|
||||||
void polarToCart_caller(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream)
|
void polarToCart_caller(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dim3 threads(32, 8, 1);
|
dim3 threads(32, 8, 1);
|
||||||
dim3 grid(1, 1, 1);
|
dim3 grid(1, 1, 1);
|
||||||
@ -202,9 +198,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void polarToCart_gpu(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream)
|
void polarToCart_gpu(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef void (*caller_t)(const DevMem2Df& mag, const DevMem2Df& angle, const DevMem2Df& x, const DevMem2Df& y, bool angleInDegrees, cudaStream_t stream);
|
typedef void (*caller_t)(DevMem2Df mag, DevMem2Df angle, DevMem2Df x, DevMem2Df y, bool angleInDegrees, cudaStream_t stream);
|
||||||
static const caller_t callers[2] =
|
static const caller_t callers[2] =
|
||||||
{
|
{
|
||||||
polarToCart_caller<NonEmptyMag>,
|
polarToCart_caller<NonEmptyMag>,
|
||||||
@ -213,7 +209,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace mathfunc
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -45,7 +45,7 @@
|
|||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
template <typename T> struct shift_and_sizeof;
|
template <typename T> struct shift_and_sizeof;
|
||||||
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
|
template <> struct shift_and_sizeof<signed char> { enum { shift = 0 }; };
|
||||||
@ -73,7 +73,6 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
mat_dst[idx] = mat_src[idx];
|
mat_dst[idx] = mat_src[idx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
typedef void (*CopyToFunc)(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream);
|
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void copy_to_with_mask_run(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
void copy_to_with_mask_run(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
||||||
@ -91,6 +90,8 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
|
|
||||||
void copy_to_with_mask(const DevMem2Db& mat_src, DevMem2Db mat_dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
void copy_to_with_mask(const DevMem2Db& mat_src, DevMem2Db mat_dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
|
typedef void (*CopyToFunc)(const DevMem2Db& mat_src, const DevMem2Db& mat_dst, const DevMem2Db& mask, int channels, const cudaStream_t & stream);
|
||||||
|
|
||||||
static CopyToFunc tab[8] =
|
static CopyToFunc tab[8] =
|
||||||
{
|
{
|
||||||
copy_to_with_mask_run<unsigned char>,
|
copy_to_with_mask_run<unsigned char>,
|
||||||
@ -302,7 +303,7 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||||
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||||
Convertor<T, D> op(alpha, beta);
|
Convertor<T, D> op(alpha, beta);
|
||||||
transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
|
OPENCV_DEVICE_NAMESPACE_ transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta,
|
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta,
|
||||||
@ -343,4 +344,5 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
|
|
||||||
func(src, dst, alpha, beta, stream);
|
func(src, dst, alpha, beta, stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -40,17 +40,15 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/transform.hpp"
|
#include "opencv2/gpu/device/transform.hpp"
|
||||||
#include "internal_shared.hpp"
|
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc
|
namespace matrix_reductions {
|
||||||
{
|
|
||||||
|
|
||||||
// Performs reduction in shared memory
|
// Performs reduction in shared memory
|
||||||
template <int size, typename T>
|
template <int size, typename T>
|
||||||
@ -73,7 +71,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct Mask8U
|
struct Mask8U
|
||||||
{
|
{
|
||||||
explicit Mask8U(PtrStepb mask): mask(mask) {}
|
explicit Mask8U(PtrStepb mask): mask(mask) {}
|
||||||
@ -86,7 +83,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
PtrStepb mask;
|
PtrStepb mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct MaskTrue
|
struct MaskTrue
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||||
@ -109,10 +105,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
|
template <> struct MinMaxTypeTraits<float> { typedef float best_type; };
|
||||||
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
|
template <> struct MinMaxTypeTraits<double> { typedef double best_type; };
|
||||||
|
|
||||||
|
|
||||||
namespace minmax
|
namespace minmax
|
||||||
{
|
{
|
||||||
|
|
||||||
__constant__ int ctwidth;
|
__constant__ int ctwidth;
|
||||||
__constant__ int ctheight;
|
__constant__ int ctheight;
|
||||||
|
|
||||||
@ -126,8 +120,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
{
|
{
|
||||||
threads = dim3(32, 8);
|
threads = dim3(32, 8);
|
||||||
grid = dim3(divUp(cols, threads.x * 8), divUp(rows, threads.y * 32));
|
grid = dim3(divUp(cols, threads.x * 8), divUp(rows, threads.y * 32));
|
||||||
grid.x = min(grid.x, threads.x);
|
grid.x = std::min(grid.x, threads.x);
|
||||||
grid.y = min(grid.y, threads.y);
|
grid.y = std::min(grid.y, threads.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -155,8 +149,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval)
|
__device__ __forceinline__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval)
|
||||||
{
|
{
|
||||||
minval[tid] = min(minval[tid], minval[tid + offset]);
|
minval[tid] = ::min(minval[tid], minval[tid + offset]);
|
||||||
maxval[tid] = max(maxval[tid], maxval[tid + offset]);
|
maxval[tid] = ::max(maxval[tid], maxval[tid + offset]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -192,8 +186,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
T mymin = numeric_limits<T>::max();
|
T mymin = numeric_limits<T>::max();
|
||||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() : numeric_limits<T>::min();
|
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() : numeric_limits<T>::min();
|
||||||
uint y_end = min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
uint y_end = ::min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
||||||
uint x_end = min(x0 + (ctwidth - 1) * blockDim.x + 1, src.cols);
|
uint x_end = ::min(x0 + (ctwidth - 1) * blockDim.x + 1, src.cols);
|
||||||
for (uint y = y0; y < y_end; y += blockDim.y)
|
for (uint y = y0; y < y_end; y += blockDim.y)
|
||||||
{
|
{
|
||||||
const T* src_row = (const T*)src.ptr(y);
|
const T* src_row = (const T*)src.ptr(y);
|
||||||
@ -202,8 +196,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
T val = src_row[x];
|
T val = src_row[x];
|
||||||
if (mask(y, x))
|
if (mask(y, x))
|
||||||
{
|
{
|
||||||
mymin = min(mymin, val);
|
mymin = ::min(mymin, val);
|
||||||
mymax = max(mymax, val);
|
mymax = ::max(mymax, val);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -220,7 +214,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
maxval[blockIdx.y * gridDim.x + blockIdx.x] = (T)smaxval[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -237,7 +231,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
if (is_last)
|
if (is_last)
|
||||||
{
|
{
|
||||||
uint idx = min(tid, gridDim.x * gridDim.y - 1);
|
uint idx = ::min(tid, gridDim.x * gridDim.y - 1);
|
||||||
|
|
||||||
sminval[tid] = minval[idx];
|
sminval[tid] = minval[idx];
|
||||||
smaxval[tid] = maxval[idx];
|
smaxval[tid] = maxval[idx];
|
||||||
@ -332,7 +326,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
__shared__ best_type smaxval[nthreads];
|
__shared__ best_type smaxval[nthreads];
|
||||||
|
|
||||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
uint idx = min(tid, size - 1);
|
uint idx = ::min(tid, size - 1);
|
||||||
|
|
||||||
sminval[tid] = minval[idx];
|
sminval[tid] = minval[idx];
|
||||||
smaxval[tid] = maxval[idx];
|
smaxval[tid] = maxval[idx];
|
||||||
@ -410,14 +404,13 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template void minMaxMultipassCaller<short>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<short>(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<int>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<int>(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
template void minMaxMultipassCaller<float>(const DevMem2Db, double*, double*, PtrStepb);
|
template void minMaxMultipassCaller<float>(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
|
|
||||||
} // namespace minmax
|
} // namespace minmax
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// minMaxLoc
|
// minMaxLoc
|
||||||
|
|
||||||
namespace minmaxloc {
|
namespace minmaxloc
|
||||||
|
{
|
||||||
__constant__ int ctwidth;
|
__constant__ int ctwidth;
|
||||||
__constant__ int ctheight;
|
__constant__ int ctheight;
|
||||||
|
|
||||||
@ -431,8 +424,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
{
|
{
|
||||||
threads = dim3(32, 8);
|
threads = dim3(32, 8);
|
||||||
grid = dim3(divUp(cols, threads.x * 8), divUp(rows, threads.y * 32));
|
grid = dim3(divUp(cols, threads.x * 8), divUp(rows, threads.y * 32));
|
||||||
grid.x = min(grid.x, threads.x);
|
grid.x = std::min(grid.x, threads.x);
|
||||||
grid.y = min(grid.y, threads.y);
|
grid.y = std::min(grid.y, threads.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -513,12 +506,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
|
|
||||||
T mymin = numeric_limits<T>::max();
|
T mymin = numeric_limits<T>::max();
|
||||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() :
|
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() : numeric_limits<T>::min();
|
||||||
numeric_limits<T>::min();
|
|
||||||
uint myminloc = 0;
|
uint myminloc = 0;
|
||||||
uint mymaxloc = 0;
|
uint mymaxloc = 0;
|
||||||
uint y_end = min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
uint y_end = ::min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
||||||
uint x_end = min(x0 + (ctwidth - 1) * blockDim.x + 1, src.cols);
|
uint x_end = ::min(x0 + (ctwidth - 1) * blockDim.x + 1, src.cols);
|
||||||
|
|
||||||
for (uint y = y0; y < y_end; y += blockDim.y)
|
for (uint y = y0; y < y_end; y += blockDim.y)
|
||||||
{
|
{
|
||||||
@ -542,7 +534,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
findMinMaxLocInSmem<nthreads, best_type>(sminval, smaxval, sminloc, smaxloc, tid);
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -561,7 +553,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
if (is_last)
|
if (is_last)
|
||||||
{
|
{
|
||||||
uint idx = min(tid, gridDim.x * gridDim.y - 1);
|
uint idx = ::min(tid, gridDim.x * gridDim.y - 1);
|
||||||
|
|
||||||
sminval[tid] = minval[idx];
|
sminval[tid] = minval[idx];
|
||||||
smaxval[tid] = maxval[idx];
|
smaxval[tid] = maxval[idx];
|
||||||
@ -685,7 +677,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
__shared__ uint smaxloc[nthreads];
|
__shared__ uint smaxloc[nthreads];
|
||||||
|
|
||||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||||
uint idx = min(tid, size - 1);
|
uint idx = ::min(tid, size - 1);
|
||||||
|
|
||||||
sminval[tid] = minval[idx];
|
sminval[tid] = minval[idx];
|
||||||
smaxval[tid] = maxval[idx];
|
smaxval[tid] = maxval[idx];
|
||||||
@ -787,7 +779,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template void minMaxLocMultipassCaller<short>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<short>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<int>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<int>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
template void minMaxLocMultipassCaller<float>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
template void minMaxLocMultipassCaller<float>(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
|
|
||||||
} // namespace minmaxloc
|
} // namespace minmaxloc
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -795,7 +786,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
namespace countnonzero
|
namespace countnonzero
|
||||||
{
|
{
|
||||||
|
|
||||||
__constant__ int ctwidth;
|
__constant__ int ctwidth;
|
||||||
__constant__ int ctheight;
|
__constant__ int ctheight;
|
||||||
|
|
||||||
@ -805,8 +795,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
{
|
{
|
||||||
threads = dim3(32, 8);
|
threads = dim3(32, 8);
|
||||||
grid = dim3(divUp(cols, threads.x * 8), divUp(rows, threads.y * 32));
|
grid = dim3(divUp(cols, threads.x * 8), divUp(rows, threads.y * 32));
|
||||||
grid.x = min(grid.x, threads.x);
|
grid.x = std::min(grid.x, threads.x);
|
||||||
grid.y = min(grid.y, threads.y);
|
grid.y = std::min(grid.y, threads.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -850,7 +840,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
sumInSmem<nthreads, uint>(scount, tid);
|
sumInSmem<nthreads, uint>(scount, tid);
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -963,9 +953,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
// Sum
|
// Sum
|
||||||
|
|
||||||
namespace sums
|
namespace sum
|
||||||
{
|
{
|
||||||
|
|
||||||
template <typename T> struct SumType {};
|
template <typename T> struct SumType {};
|
||||||
template <> struct SumType<uchar> { typedef uint R; };
|
template <> struct SumType<uchar> { typedef uint R; };
|
||||||
template <> struct SumType<char> { typedef int R; };
|
template <> struct SumType<char> { typedef int R; };
|
||||||
@ -979,7 +968,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
struct IdentityOp { static __device__ __forceinline__ R call(R x) { return x; } };
|
struct IdentityOp { static __device__ __forceinline__ R call(R x) { return x; } };
|
||||||
|
|
||||||
template <typename R>
|
template <typename R>
|
||||||
struct AbsOp { static __device__ __forceinline__ R call(R x) { return abs(x); } };
|
struct AbsOp { static __device__ __forceinline__ R call(R x) { return ::abs(x); } };
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct AbsOp<uint> { static __device__ __forceinline__ uint call(uint x) { return x; } };
|
struct AbsOp<uint> { static __device__ __forceinline__ uint call(uint x) { return x; } };
|
||||||
@ -999,8 +988,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
threads = dim3(threads_x, threads_y);
|
threads = dim3(threads_x, threads_y);
|
||||||
grid = dim3(divUp(cols, threads.x * threads.y),
|
grid = dim3(divUp(cols, threads.x * threads.y),
|
||||||
divUp(rows, threads.y * threads.x));
|
divUp(rows, threads.y * threads.x));
|
||||||
grid.x = min(grid.x, threads.x);
|
grid.x = std::min(grid.x, threads.x);
|
||||||
grid.y = min(grid.y, threads.y);
|
grid.y = std::min(grid.y, threads.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1044,7 +1033,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
|
|
||||||
sumInSmem<nthreads, R>(smem, tid);
|
sumInSmem<nthreads, R>(smem, tid);
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1125,7 +1114,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
sumInSmem<nthreads, R>(smem, tid);
|
sumInSmem<nthreads, R>(smem, tid);
|
||||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1232,7 +1221,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
sumInSmem<nthreads, R>(smem + nthreads, tid);
|
||||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1349,7 +1338,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
sumInSmem<nthreads, R>(smem + 2 * nthreads, tid);
|
||||||
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
|
sumInSmem<nthreads, R>(smem + 3 * nthreads, tid);
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
#if __CUDA_ARCH__ >= 110
|
||||||
__shared__ bool is_last;
|
__shared__ bool is_last;
|
||||||
|
|
||||||
if (tid == 0)
|
if (tid == 0)
|
||||||
@ -1437,13 +1426,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace sum
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
using namespace sums;
|
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -1515,7 +1500,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void sumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
using namespace sums;
|
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -1565,7 +1549,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void absSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void absSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
using namespace sums;
|
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -1637,7 +1620,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void absSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void absSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
using namespace sums;
|
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -1687,7 +1669,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sqrSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
using namespace sums;
|
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -1759,7 +1740,6 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
void sqrSumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn)
|
||||||
{
|
{
|
||||||
using namespace sums;
|
|
||||||
typedef typename SumType<T>::R R;
|
typedef typename SumType<T>::R R;
|
||||||
|
|
||||||
dim3 threads, grid;
|
dim3 threads, grid;
|
||||||
@ -1804,6 +1784,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template void sqrSumCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<short>(const DevMem2Db, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<int>(const DevMem2Db, PtrStepb, double*, int);
|
||||||
template void sqrSumCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
template void sqrSumCaller<float>(const DevMem2Db, PtrStepb, double*, int);
|
||||||
|
} // namespace sum
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// reduce
|
// reduce
|
||||||
@ -2101,4 +2082,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template void reduceCols_gpu<int, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<int, int, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
|
|
||||||
template void reduceCols_gpu<float, float, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template void reduceCols_gpu<float, float, float>(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace mattrix_reductions
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -46,11 +46,10 @@
|
|||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace pyr_down {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
template <typename T, typename B> __global__ void pyrDown(const PtrStep<T> src, PtrStep<T> dst, const B b, int dst_cols)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||||
@ -182,4 +181,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrDown_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace pyr_down
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -46,11 +46,10 @@
|
|||||||
#include "opencv2/gpu/device/vec_math.hpp"
|
#include "opencv2/gpu/device/vec_math.hpp"
|
||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace pyr_up {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
|
||||||
{
|
|
||||||
template <typename T, typename B> __global__ void pyrUp(const PtrStep<T> src, DevMem2D_<T> dst, const B b)
|
template <typename T, typename B> __global__ void pyrUp(const PtrStep<T> src, DevMem2D_<T> dst, const B b)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||||
@ -177,4 +176,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
template void pyrUp_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<float, 2>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<float, 3>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
template void pyrUp_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template void pyrUp_gpu<float, 4>(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace pyr_up
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -47,11 +47,9 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/filters.hpp"
|
#include "opencv2/gpu/device/filters.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
namespace remap {
|
||||||
{
|
|
||||||
|
|
||||||
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
|
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
|
||||||
{
|
{
|
||||||
@ -124,7 +122,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
TextureBinder texHandler(&tex_remap_ ## type , src); \
|
bindTexture(&tex_remap_ ## type , src); \
|
||||||
tex_remap_ ## type ##_reader texSrc; \
|
tex_remap_ ## type ##_reader texSrc; \
|
||||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||||
@ -140,7 +138,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
{ \
|
{ \
|
||||||
dim3 block(32, 8); \
|
dim3 block(32, 8); \
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
TextureBinder texHandler(&tex_remap_ ## type , src); \
|
bindTexture(&tex_remap_ ## type , src); \
|
||||||
tex_remap_ ## type ##_reader texSrc; \
|
tex_remap_ ## type ##_reader texSrc; \
|
||||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||||
@ -250,4 +248,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
//template void remap_gpu<float2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
//template void remap_gpu<float2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<float3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
template void remap_gpu<float4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
template void remap_gpu<float4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
}}}
|
|
||||||
|
} // namespace remap
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -47,11 +47,9 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/filters.hpp"
|
#include "opencv2/gpu/device/filters.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
namespace resize {
|
||||||
{
|
|
||||||
|
|
||||||
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
template <typename Ptr2D, typename T> __global__ void resize(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
||||||
{
|
{
|
||||||
@ -161,7 +159,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
{ \
|
{ \
|
||||||
dim3 block(32, 8); \
|
dim3 block(32, 8); \
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
TextureBinder texHandler(&tex_resize_ ## type , src); \
|
bindTexture(&tex_resize_ ## type , src); \
|
||||||
tex_resize_ ## type ##_reader texSrc; \
|
tex_resize_ ## type ##_reader texSrc; \
|
||||||
Filter< tex_resize_ ## type ##_reader > filter_src(texSrc); \
|
Filter< tex_resize_ ## type ##_reader > filter_src(texSrc); \
|
||||||
resize<<<grid, block>>>(filter_src, fx, fy, dst); \
|
resize<<<grid, block>>>(filter_src, fx, fy, dst); \
|
||||||
@ -175,7 +173,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
{ \
|
{ \
|
||||||
dim3 block(32, 8); \
|
dim3 block(32, 8); \
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
TextureBinder texHandler(&tex_resize_ ## type , src); \
|
bindTexture(&tex_resize_ ## type , src); \
|
||||||
tex_resize_ ## type ##_reader texSrc; \
|
tex_resize_ ## type ##_reader texSrc; \
|
||||||
resizeNN<<<grid, block>>>(texSrc, fx, fy, dst); \
|
resizeNN<<<grid, block>>>(texSrc, fx, fy, dst); \
|
||||||
cudaSafeCall( cudaGetLastError() ); \
|
cudaSafeCall( cudaGetLastError() ); \
|
||||||
@ -261,4 +259,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
//template void resize_gpu<float2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
//template void resize_gpu<float2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<float3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
template void resize_gpu<float4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template void resize_gpu<float4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace resize
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -47,8 +47,7 @@
|
|||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
#define MAX_KERNEL_SIZE 16
|
#define MAX_KERNEL_SIZE 16
|
||||||
#define BLOCK_DIM_X 16
|
#define BLOCK_DIM_X 16
|
||||||
@ -56,8 +55,8 @@ using namespace cv::gpu::device;
|
|||||||
#define RESULT_STEPS 8
|
#define RESULT_STEPS 8
|
||||||
#define HALO_STEPS 1
|
#define HALO_STEPS 1
|
||||||
|
|
||||||
namespace filter_row
|
namespace row_filter {
|
||||||
{
|
|
||||||
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
__constant__ float c_kernel[MAX_KERNEL_SIZE];
|
||||||
|
|
||||||
void loadKernel(const float kernel[], int ksize)
|
void loadKernel(const float kernel[], int ksize)
|
||||||
@ -134,21 +133,18 @@ namespace filter_row
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace filters
|
|
||||||
{
|
|
||||||
template <int ksize, typename T, typename D, template<typename> class B>
|
template <int ksize, typename T, typename D, template<typename> class B>
|
||||||
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef typename filter_row::SmemType<T>::smem_t smem_t;
|
typedef typename SmemType<T>::smem_t smem_t;
|
||||||
|
|
||||||
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
const dim3 block(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||||
const dim3 grid(divUp(src.cols, RESULT_STEPS * BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
const dim3 grid(divUp(src.cols, RESULT_STEPS * BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
|
||||||
|
|
||||||
B<smem_t> b(src.cols);
|
B<smem_t> b(src.cols);
|
||||||
|
|
||||||
filter_row::linearRowFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
|
linearRowFilter<ksize, T, D><<<grid, block, 0, stream>>>(src, dst, anchor, b);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
@ -258,7 +254,7 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
filter_row::loadKernel(kernel, ksize);
|
loadKernel(kernel, ksize);
|
||||||
|
|
||||||
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
callers[brd_type][ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor, stream);
|
||||||
}
|
}
|
||||||
@ -270,4 +266,7 @@ namespace cv { namespace gpu { namespace filters
|
|||||||
template void linearRowFilter_gpu<short3, float3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<short3, float3>(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<int , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<int , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
template void linearRowFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
template void linearRowFilter_gpu<float , float >(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace row_filter
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -43,9 +43,9 @@
|
|||||||
#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
|
#ifndef __OPENCV_CUDA_SAFE_CALL_HPP__
|
||||||
#define __OPENCV_CUDA_SAFE_CALL_HPP__
|
#define __OPENCV_CUDA_SAFE_CALL_HPP__
|
||||||
|
|
||||||
#include "cuda_runtime_api.h"
|
#include <cuda_runtime_api.h>
|
||||||
#include "cufft.h"
|
#include <cufft.h>
|
||||||
#include "cublas.h"
|
#include <cublas.h>
|
||||||
#include "NCV.hpp"
|
#include "NCV.hpp"
|
||||||
|
|
||||||
#if defined(__GNUC__)
|
#if defined(__GNUC__)
|
||||||
@ -62,10 +62,8 @@
|
|||||||
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
|
#define cublasSafeCall(expr) ___cublasSafeCall(expr, __FILE__, __LINE__)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv
|
namespace cv { namespace gpu {
|
||||||
{
|
|
||||||
namespace gpu
|
|
||||||
{
|
|
||||||
void error(const char *error_string, const char *file, const int line, const char *func = "");
|
void error(const char *error_string, const char *file, const int line, const char *func = "");
|
||||||
void nppError(int err, const char *file, const int line, const char *func = "");
|
void nppError(int err, const char *file, const int line, const char *func = "");
|
||||||
void ncvError(int err, const char *file, const int line, const char *func = "");
|
void ncvError(int err, const char *file, const int line, const char *func = "");
|
||||||
@ -101,7 +99,7 @@ namespace cv
|
|||||||
if (CUBLAS_STATUS_SUCCESS != err)
|
if (CUBLAS_STATUS_SUCCESS != err)
|
||||||
cv::gpu::cublasError(err, file, line, func);
|
cv::gpu::cublasError(err, file, line, func);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}}
|
||||||
|
|
||||||
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
|
#endif /* __OPENCV_CUDA_SAFE_CALL_HPP__ */
|
@ -42,7 +42,9 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace split_merge {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace split_merge {
|
||||||
|
|
||||||
template <typename T, size_t elem_size = sizeof(T)>
|
template <typename T, size_t elem_size = sizeof(T)>
|
||||||
struct TypeTraits
|
struct TypeTraits
|
||||||
@ -274,7 +276,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern "C" void merge_caller(const DevMem2Db* src, DevMem2Db& dst,
|
void merge_caller(const DevMem2Db* src, DevMem2Db& dst,
|
||||||
int total_channels, size_t elem_size,
|
int total_channels, size_t elem_size,
|
||||||
const cudaStream_t& stream)
|
const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
@ -483,9 +485,7 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
extern "C" void split_caller(const DevMem2Db& src, DevMem2Db* dst,
|
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream)
|
||||||
int num_channels, size_t elem_size1,
|
|
||||||
const cudaStream_t& stream)
|
|
||||||
{
|
{
|
||||||
static SplitFunction split_func_tbl[] =
|
static SplitFunction split_func_tbl[] =
|
||||||
{
|
{
|
||||||
@ -503,4 +503,6 @@ namespace cv { namespace gpu { namespace split_merge {
|
|||||||
split_func(src, dst, stream);
|
split_func(src, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
}}} // namespace cv::gpu::split_merge
|
} // namespace split_merge
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -40,23 +40,18 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
//#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "opencv2/gpu/devmem2d.hpp"
|
|
||||||
#include "safe_call.hpp"
|
|
||||||
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
using namespace cv::gpu;
|
namespace stereobm {
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////////// Streeo BM ////////////////////////////////////////////////
|
/////////////////////////////////////// Stereo BM ////////////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
#define ROWSperTHREAD 21 // the number of rows a thread will process
|
#define ROWSperTHREAD 21 // the number of rows a thread will process
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bm
|
|
||||||
{
|
|
||||||
|
|
||||||
#define BLOCK_W 128 // the thread block width (464)
|
#define BLOCK_W 128 // the thread block width (464)
|
||||||
#define N_DISPARITIES 8
|
#define N_DISPARITIES 8
|
||||||
|
|
||||||
@ -117,7 +112,7 @@ __device__ uint2 MinSSD(volatile unsigned int *col_ssd_cache, volatile unsigned
|
|||||||
__syncthreads();
|
__syncthreads();
|
||||||
ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS));
|
ssd[7] = CalcSSD<RADIUS>(col_ssd_cache, col_ssd + 7 * (BLOCK_W + 2 * RADIUS));
|
||||||
|
|
||||||
int mssd = min(min(min(ssd[0], ssd[1]), min(ssd[4], ssd[5])), min(min(ssd[2], ssd[3]), min(ssd[6], ssd[7])));
|
int mssd = ::min(::min(::min(ssd[0], ssd[1]), ::min(ssd[4], ssd[5])), ::min(::min(ssd[2], ssd[3]), ::min(ssd[6], ssd[7])));
|
||||||
|
|
||||||
int bestIdx = 0;
|
int bestIdx = 0;
|
||||||
for (int i = 0; i < N_DISPARITIES; i++)
|
for (int i = 0; i < N_DISPARITIES; i++)
|
||||||
@ -252,7 +247,7 @@ __global__ void stereoKernel(unsigned char *left, unsigned char *right, size_t i
|
|||||||
for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
|
for(uint *ptr = minSSDImage; ptr != minSSDImage_end; ptr += minssd_step )
|
||||||
*ptr = 0xFFFFFFFF;
|
*ptr = 0xFFFFFFFF;
|
||||||
}*/
|
}*/
|
||||||
int end_row = min(ROWSperTHREAD, cheight - Y - RADIUS);
|
int end_row = ::min(ROWSperTHREAD, cheight - Y - RADIUS);
|
||||||
int y_tex;
|
int y_tex;
|
||||||
int x_tex = X - RADIUS;
|
int x_tex = X - RADIUS;
|
||||||
|
|
||||||
@ -346,7 +341,7 @@ const static kernel_caller_t callers[] =
|
|||||||
};
|
};
|
||||||
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
const int calles_num = sizeof(callers)/sizeof(callers[0]);
|
||||||
|
|
||||||
extern "C" void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int maxdisp, int winsz, const DevMem2D_<unsigned int>& minSSD_buf, cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz >> 1;
|
int winsz2 = winsz >> 1;
|
||||||
|
|
||||||
@ -375,7 +370,7 @@ extern "C" void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, cons
|
|||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
texture<unsigned char, 2, cudaReadModeElementType> texForSobel;
|
||||||
|
|
||||||
extern "C" __global__ void prefilter_kernel(DevMem2Db output, int prefilterCap)
|
__global__ void prefilter_kernel(DevMem2Db output, int prefilterCap)
|
||||||
{
|
{
|
||||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||||
@ -387,12 +382,12 @@ extern "C" __global__ void prefilter_kernel(DevMem2Db output, int prefilterCap)
|
|||||||
(int)tex2D(texForSobel, x - 1, y + 1) * (-1) + (int)tex2D(texForSobel, x + 1, y + 1) * (1);
|
(int)tex2D(texForSobel, x - 1, y + 1) * (-1) + (int)tex2D(texForSobel, x + 1, y + 1) * (1);
|
||||||
|
|
||||||
|
|
||||||
conv = min(min(max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
|
conv = ::min(::min(::max(-prefilterCap, conv), prefilterCap) + prefilterCap, 255);
|
||||||
output.ptr(y)[x] = conv & 0xFF;
|
output.ptr(y)[x] = conv & 0xFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap, cudaStream_t & stream)
|
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||||
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
cudaSafeCall( cudaBindTexture2D( 0, texForSobel, input.data, desc, input.cols, input.rows, input.step ) );
|
||||||
@ -451,7 +446,7 @@ __device__ float CalcSums(float *cols, float *cols_cache, int winsz)
|
|||||||
|
|
||||||
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
||||||
|
|
||||||
extern "C" __global__ void textureness_kernel(DevMem2Db disp, int winsz, float threshold)
|
__global__ void textureness_kernel(DevMem2Db disp, int winsz, float threshold)
|
||||||
{
|
{
|
||||||
int winsz2 = winsz/2;
|
int winsz2 = winsz/2;
|
||||||
int n_dirty_pixels = (winsz2) * 2;
|
int n_dirty_pixels = (winsz2) * 2;
|
||||||
@ -462,7 +457,7 @@ extern "C" __global__ void textureness_kernel(DevMem2Db disp, int winsz, float t
|
|||||||
|
|
||||||
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
int beg_row = blockIdx.y * RpT;
|
int beg_row = blockIdx.y * RpT;
|
||||||
int end_row = min(beg_row + RpT, disp.rows);
|
int end_row = ::min(beg_row + RpT, disp.rows);
|
||||||
|
|
||||||
if (x < disp.cols)
|
if (x < disp.cols)
|
||||||
{
|
{
|
||||||
@ -510,7 +505,7 @@ extern "C" __global__ void textureness_kernel(DevMem2Db disp, int winsz, float t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream)
|
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream)
|
||||||
{
|
{
|
||||||
avgTexturenessThreshold *= winsz * winsz;
|
avgTexturenessThreshold *= winsz * winsz;
|
||||||
|
|
||||||
@ -537,4 +532,6 @@ extern "C" void postfilter_textureness(const DevMem2Db& input, int winsz, float
|
|||||||
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
cudaSafeCall( cudaUnbindTexture (texForTF) );
|
||||||
}
|
}
|
||||||
|
|
||||||
}}}
|
} // namespace stereobm
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -44,11 +44,10 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
namespace stereobp {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp
|
|
||||||
{
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////// load constants ////////////////////////
|
/////////////////////// load constants ////////////////////////
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
@ -81,7 +80,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
}
|
}
|
||||||
__device__ __forceinline__ float operator()(const uchar* rs) const
|
__device__ __forceinline__ float operator()(const uchar* rs) const
|
||||||
{
|
{
|
||||||
return abs((int)l - *rs);
|
return ::abs((int)l - *rs);
|
||||||
}
|
}
|
||||||
uchar l;
|
uchar l;
|
||||||
};
|
};
|
||||||
@ -97,9 +96,9 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
const float tg = 0.587f;
|
const float tg = 0.587f;
|
||||||
const float tb = 0.114f;
|
const float tb = 0.114f;
|
||||||
|
|
||||||
float val = tb * abs((int)l.x - rs[0]);
|
float val = tb * ::abs((int)l.x - rs[0]);
|
||||||
val += tg * abs((int)l.y - rs[1]);
|
val += tg * ::abs((int)l.y - rs[1]);
|
||||||
val += tr * abs((int)l.z - rs[2]);
|
val += tr * ::abs((int)l.z - rs[2]);
|
||||||
|
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
@ -119,9 +118,9 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
|
|
||||||
uchar4 r = *((uchar4*)rs);
|
uchar4 r = *((uchar4*)rs);
|
||||||
|
|
||||||
float val = tb * abs((int)l.x - r.x);
|
float val = tb * ::abs((int)l.x - r.x);
|
||||||
val += tg * abs((int)l.y - r.y);
|
val += tg * ::abs((int)l.y - r.y);
|
||||||
val += tr * abs((int)l.z - r.z);
|
val += tr * ::abs((int)l.z - r.z);
|
||||||
|
|
||||||
return val;
|
return val;
|
||||||
}
|
}
|
||||||
@ -328,10 +327,13 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mus[src_idx], (DevMem2D_<T>)mus[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mus[src_idx], (DevMem2D_<T>)mus[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mds[src_idx], (DevMem2D_<T>)mds[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mds[src_idx], (DevMem2D_<T>)mds[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mls[src_idx], (DevMem2D_<T>)mls[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mls[src_idx], (DevMem2D_<T>)mls[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mrs[src_idx], (DevMem2D_<T>)mrs[dst_idx]);
|
level_up_message<T><<<grid, threads, 0, stream>>>(dst_cols, dst_rows, src_rows, (DevMem2D_<T>)mrs[src_idx], (DevMem2D_<T>)mrs[dst_idx]);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
@ -380,7 +382,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
|
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
|
||||||
{
|
{
|
||||||
float minimum = numeric_limits<float>::max();
|
float minimum = device::numeric_limits<float>::max();
|
||||||
|
|
||||||
for(int i = 0; i < cndisp; ++i)
|
for(int i = 0; i < cndisp; ++i)
|
||||||
{
|
{
|
||||||
@ -524,4 +526,7 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
|
|
||||||
template void output_gpu<short>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
template void output_gpu<short>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
||||||
template void output_gpu<float>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
template void output_gpu<float>(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data, const DevMem2D_<short>& disp, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace stereobp
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -44,12 +44,9 @@
|
|||||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
#include "opencv2/gpu/device/limits.hpp"
|
#include "opencv2/gpu/device/limits.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
|
namespace stereocsbp {
|
||||||
namespace cv { namespace gpu { namespace csbp
|
|
||||||
{
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
/////////////////////// load constants ////////////////////////
|
/////////////////////// load constants ////////////////////////
|
||||||
@ -103,16 +100,16 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
{
|
{
|
||||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||||
{
|
{
|
||||||
return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term);
|
return fmin(cdata_weight * ::abs((int)*left - *right), cdata_weight * cmax_data_term);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
template <> struct DataCostPerPixel<3>
|
template <> struct DataCostPerPixel<3>
|
||||||
{
|
{
|
||||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||||
{
|
{
|
||||||
float tb = 0.114f * abs((int)left[0] - right[0]);
|
float tb = 0.114f * ::abs((int)left[0] - right[0]);
|
||||||
float tg = 0.587f * abs((int)left[1] - right[1]);
|
float tg = 0.587f * ::abs((int)left[1] - right[1]);
|
||||||
float tr = 0.299f * abs((int)left[2] - right[2]);
|
float tr = 0.299f * ::abs((int)left[2] - right[2]);
|
||||||
|
|
||||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||||
}
|
}
|
||||||
@ -124,9 +121,9 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
uchar4 l = *((const uchar4*)left);
|
uchar4 l = *((const uchar4*)left);
|
||||||
uchar4 r = *((const uchar4*)right);
|
uchar4 r = *((const uchar4*)right);
|
||||||
|
|
||||||
float tb = 0.114f * abs((int)l.x - r.x);
|
float tb = 0.114f * ::abs((int)l.x - r.x);
|
||||||
float tg = 0.587f * abs((int)l.y - r.y);
|
float tg = 0.587f * ::abs((int)l.y - r.y);
|
||||||
float tr = 0.299f * abs((int)l.z - r.z);
|
float tr = 0.299f * ::abs((int)l.z - r.z);
|
||||||
|
|
||||||
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
|
||||||
}
|
}
|
||||||
@ -146,7 +143,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
for(int i = 0; i < nr_plane; i++)
|
for(int i = 0; i < nr_plane; i++)
|
||||||
{
|
{
|
||||||
T minimum = numeric_limits<T>::max();
|
T minimum = device::numeric_limits<T>::max();
|
||||||
int id = 0;
|
int id = 0;
|
||||||
for(int d = 0; d < cndisp; d++)
|
for(int d = 0; d < cndisp; d++)
|
||||||
{
|
{
|
||||||
@ -276,7 +273,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
int x0 = x_out << level;
|
int x0 = x_out << level;
|
||||||
int y0 = y_out << level;
|
int y0 = y_out << level;
|
||||||
|
|
||||||
int len = min(y0 + winsz, rows) - y0;
|
int len = ::min(y0 + winsz, rows) - y0;
|
||||||
|
|
||||||
float val = 0.0f;
|
float val = 0.0f;
|
||||||
if (x0 + tid < cols)
|
if (x0 + tid < cols)
|
||||||
@ -475,7 +472,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
int x0 = x_out << level;
|
int x0 = x_out << level;
|
||||||
int y0 = y_out << level;
|
int y0 = y_out << level;
|
||||||
|
|
||||||
int len = min(y0 + winsz, rows) - y0;
|
int len = ::min(y0 + winsz, rows) - y0;
|
||||||
|
|
||||||
float val = 0.0f;
|
float val = 0.0f;
|
||||||
if (x0 + tid < cols)
|
if (x0 + tid < cols)
|
||||||
@ -645,10 +642,10 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
if (y < h && x < w)
|
if (y < h && x < w)
|
||||||
{
|
{
|
||||||
const T* u_cur = u_cur_ + min(h2-1, y/2 + 1) * cmsg_step2 + x/2;
|
const T* u_cur = u_cur_ + ::min(h2-1, y/2 + 1) * cmsg_step2 + x/2;
|
||||||
const T* d_cur = d_cur_ + max(0, y/2 - 1) * cmsg_step2 + x/2;
|
const T* d_cur = d_cur_ + ::max(0, y/2 - 1) * cmsg_step2 + x/2;
|
||||||
const T* l_cur = l_cur_ + y/2 * cmsg_step2 + min(w2-1, x/2 + 1);
|
const T* l_cur = l_cur_ + y/2 * cmsg_step2 + ::min(w2-1, x/2 + 1);
|
||||||
const T* r_cur = r_cur_ + y/2 * cmsg_step2 + max(0, x/2 - 1);
|
const T* r_cur = r_cur_ + y/2 * cmsg_step2 + ::max(0, x/2 - 1);
|
||||||
|
|
||||||
T* data_cost_new = (T*)ctemp + y * cmsg_step1 + x;
|
T* data_cost_new = (T*)ctemp + y * cmsg_step1 + x;
|
||||||
|
|
||||||
@ -756,7 +753,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
T src_disp_reg = src_disp[d * cdisp_step1];
|
T src_disp_reg = src_disp[d * cdisp_step1];
|
||||||
|
|
||||||
for(int d2 = 0; d2 < nr_plane; d2++)
|
for(int d2 = 0; d2 < nr_plane; d2++)
|
||||||
cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + cdisc_single_jump * abs(dst_disp[d2 * cdisp_step1] - src_disp_reg));
|
cost_min = fmin(cost_min, msg_dst[d2 * cdisp_step1] + cdisc_single_jump * ::abs(dst_disp[d2 * cdisp_step1] - src_disp_reg));
|
||||||
|
|
||||||
temp[d * cdisp_step1] = saturate_cast<T>(cost_min);
|
temp[d * cdisp_step1] = saturate_cast<T>(cost_min);
|
||||||
sum += cost_min;
|
sum += cost_min;
|
||||||
@ -892,4 +889,7 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
|
|
||||||
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
template void compute_disp(const float* u, const float* d, const float* l, const float* r, const float* data_cost_selected, const float* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
} // namespace stereocsbp
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -52,13 +52,10 @@
|
|||||||
#include "opencv2/gpu/device/functional.hpp"
|
#include "opencv2/gpu/device/functional.hpp"
|
||||||
#include "opencv2/gpu/device/filters.hpp"
|
#include "opencv2/gpu/device/filters.hpp"
|
||||||
|
|
||||||
using namespace cv::gpu;
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
#define CV_PI 3.1415926535897932384626433832795f
|
namespace surf {
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace surf
|
|
||||||
{
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Global parameters
|
// Global parameters
|
||||||
|
|
||||||
@ -80,15 +77,46 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
__constant__ int c_layer_rows;
|
__constant__ int c_layer_rows;
|
||||||
__constant__ int c_layer_cols;
|
__constant__ int c_layer_cols;
|
||||||
|
|
||||||
|
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_max_candidates, &maxCandidates, sizeof(maxCandidates)) );
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_max_features, &maxFeatures, sizeof(maxFeatures)) );
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_img_rows, &img_rows, sizeof(img_rows)) );
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_img_cols, &img_cols, sizeof(img_cols)) );
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_nOctaveLayers, &nOctaveLayers, sizeof(nOctaveLayers)) );
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_hessianThreshold, &hessianThreshold, sizeof(hessianThreshold)) );
|
||||||
|
}
|
||||||
|
|
||||||
|
void loadOctaveConstants(int octave, int layer_rows, int layer_cols)
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_octave, &octave, sizeof(octave)) );
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_layer_rows, &layer_rows, sizeof(layer_rows)) );
|
||||||
|
cudaSafeCall( cudaMemcpyToSymbol(c_layer_cols, &layer_cols, sizeof(layer_cols)) );
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Integral image texture
|
// Integral image texture
|
||||||
|
|
||||||
|
texture<unsigned char, 2, cudaReadModeElementType> imgTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
texture<unsigned int, 2, cudaReadModeElementType> sumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<unsigned int, 2, cudaReadModeElementType> sumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
texture<unsigned int, 2, cudaReadModeElementType> maskSumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
texture<unsigned int, 2, cudaReadModeElementType> maskSumTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
||||||
|
|
||||||
|
void bindImgTex(DevMem2Db img)
|
||||||
|
{
|
||||||
|
bindTexture(&imgTex, img);
|
||||||
|
}
|
||||||
|
void bindSumTex(DevMem2D_<uint> sum)
|
||||||
|
{
|
||||||
|
bindTexture(&sumTex, sum);
|
||||||
|
}
|
||||||
|
void bindMaskSumTex(DevMem2D_<uint> maskSum)
|
||||||
|
{
|
||||||
|
bindTexture(&maskSumTex, maskSum);
|
||||||
|
}
|
||||||
|
|
||||||
template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], int oldSize, int newSize, int y, int x)
|
template <int N> __device__ float icvCalcHaarPatternSum(const float src[][5], int oldSize, int newSize, int y, int x)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
typedef double real_t;
|
typedef double real_t;
|
||||||
#else
|
#else
|
||||||
typedef float real_t;
|
typedef float real_t;
|
||||||
@ -190,14 +218,6 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// NONMAX
|
// NONMAX
|
||||||
|
|
||||||
struct WithOutMask
|
|
||||||
{
|
|
||||||
static __device__ __forceinline__ bool check(int, int, int)
|
|
||||||
{
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
__constant__ float c_DM[5] = {0, 0, 9, 9, 1};
|
__constant__ float c_DM[5] = {0, 0, 9, 9, 1};
|
||||||
|
|
||||||
struct WithMask
|
struct WithMask
|
||||||
@ -250,9 +270,9 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
// Is this thread within the hessian buffer?
|
// Is this thread within the hessian buffer?
|
||||||
const int zoff = blockDim.x * blockDim.y;
|
const int zoff = blockDim.x * blockDim.y;
|
||||||
const int localLin = threadIdx.x + threadIdx.y * blockDim.x + zoff;
|
const int localLin = threadIdx.x + threadIdx.y * blockDim.x + zoff;
|
||||||
N9[localLin - zoff] = det.ptr(c_layer_rows * (layer - 1) + min(max(i, 0), c_img_rows - 1))[min(max(j, 0), c_img_cols - 1)];
|
N9[localLin - zoff] = det.ptr(c_layer_rows * (layer - 1) + ::min(::max(i, 0), c_img_rows - 1))[::min(::max(j, 0), c_img_cols - 1)];
|
||||||
N9[localLin ] = det.ptr(c_layer_rows * (layer ) + min(max(i, 0), c_img_rows - 1))[min(max(j, 0), c_img_cols - 1)];
|
N9[localLin ] = det.ptr(c_layer_rows * (layer ) + ::min(::max(i, 0), c_img_rows - 1))[::min(::max(j, 0), c_img_cols - 1)];
|
||||||
N9[localLin + zoff] = det.ptr(c_layer_rows * (layer + 1) + min(max(i, 0), c_img_rows - 1))[min(max(j, 0), c_img_cols - 1)];
|
N9[localLin + zoff] = det.ptr(c_layer_rows * (layer + 1) + ::min(::max(i, 0), c_img_rows - 1))[::min(::max(j, 0), c_img_cols - 1)];
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
if (i < c_layer_rows - margin && j < c_layer_cols - margin && threadIdx.x > 0 && threadIdx.x < blockDim.x - 1 && threadIdx.y > 0 && threadIdx.y < blockDim.y - 1)
|
if (i < c_layer_rows - margin && j < c_layer_cols - margin && threadIdx.x > 0 && threadIdx.x < blockDim.x - 1 && threadIdx.y > 0 && threadIdx.y < blockDim.y - 1)
|
||||||
@ -400,7 +420,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
|
|
||||||
if (solve3x3(H, dD, x))
|
if (solve3x3(H, dD, x))
|
||||||
{
|
{
|
||||||
if (fabs(x[0]) <= 1.f && fabs(x[1]) <= 1.f && fabs(x[2]) <= 1.f)
|
if (::fabs(x[0]) <= 1.f && ::fabs(x[1]) <= 1.f && ::fabs(x[2]) <= 1.f)
|
||||||
{
|
{
|
||||||
// if the step is within the interpolation region, perform it
|
// if the step is within the interpolation region, perform it
|
||||||
|
|
||||||
@ -540,25 +560,25 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
|
const int dir = (i * 4 + threadIdx.y) * ORI_SEARCH_INC;
|
||||||
|
|
||||||
float sumx = 0.0f, sumy = 0.0f;
|
float sumx = 0.0f, sumy = 0.0f;
|
||||||
int d = abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
|
int d = ::abs(__float2int_rn(s_angle[threadIdx.x]) - dir);
|
||||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||||
{
|
{
|
||||||
sumx = s_X[threadIdx.x];
|
sumx = s_X[threadIdx.x];
|
||||||
sumy = s_Y[threadIdx.x];
|
sumy = s_Y[threadIdx.x];
|
||||||
}
|
}
|
||||||
d = abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 32]) - dir);
|
||||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||||
{
|
{
|
||||||
sumx += s_X[threadIdx.x + 32];
|
sumx += s_X[threadIdx.x + 32];
|
||||||
sumy += s_Y[threadIdx.x + 32];
|
sumy += s_Y[threadIdx.x + 32];
|
||||||
}
|
}
|
||||||
d = abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 64]) - dir);
|
||||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||||
{
|
{
|
||||||
sumx += s_X[threadIdx.x + 64];
|
sumx += s_X[threadIdx.x + 64];
|
||||||
sumy += s_Y[threadIdx.x + 64];
|
sumy += s_Y[threadIdx.x + 64];
|
||||||
}
|
}
|
||||||
d = abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
|
d = ::abs(__float2int_rn(s_angle[threadIdx.x + 96]) - dir);
|
||||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||||
{
|
{
|
||||||
sumx += s_X[threadIdx.x + 96];
|
sumx += s_X[threadIdx.x + 96];
|
||||||
@ -567,8 +587,8 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
|
|
||||||
float* s_sum_row = s_sum + threadIdx.y * 32;
|
float* s_sum_row = s_sum + threadIdx.y * 32;
|
||||||
|
|
||||||
reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
|
device::reduce<32>(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
|
||||||
reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
|
device::reduce<32>(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
|
||||||
|
|
||||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||||
if (temp_mod > best_mod)
|
if (temp_mod > best_mod)
|
||||||
@ -650,8 +670,6 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
|
|
||||||
#define PATCH_SZ 20
|
#define PATCH_SZ 20
|
||||||
|
|
||||||
texture<unsigned char, 2, cudaReadModeElementType> imgTex(0, cudaFilterModePoint, cudaAddressModeClamp);
|
|
||||||
|
|
||||||
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
__constant__ float c_DW[PATCH_SZ * PATCH_SZ] =
|
||||||
{
|
{
|
||||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f,
|
||||||
@ -805,8 +823,8 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
|
|
||||||
if (tid < 25)
|
if (tid < 25)
|
||||||
{
|
{
|
||||||
sdxabs[tid] = fabs(sdx[tid]); // |dx| array
|
sdxabs[tid] = ::fabs(sdx[tid]); // |dx| array
|
||||||
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
|
sdyabs[tid] = ::fabs(sdy[tid]); // |dy| array
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
|
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
|
||||||
@ -847,7 +865,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
if (sdy[tid] >= 0)
|
if (sdy[tid] >= 0)
|
||||||
{
|
{
|
||||||
sd1[tid] = sdx[tid];
|
sd1[tid] = sdx[tid];
|
||||||
sdabs1[tid] = fabs(sdx[tid]);
|
sdabs1[tid] = ::fabs(sdx[tid]);
|
||||||
sd2[tid] = 0;
|
sd2[tid] = 0;
|
||||||
sdabs2[tid] = 0;
|
sdabs2[tid] = 0;
|
||||||
}
|
}
|
||||||
@ -856,7 +874,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
sd1[tid] = 0;
|
sd1[tid] = 0;
|
||||||
sdabs1[tid] = 0;
|
sdabs1[tid] = 0;
|
||||||
sd2[tid] = sdx[tid];
|
sd2[tid] = sdx[tid];
|
||||||
sdabs2[tid] = fabs(sdx[tid]);
|
sdabs2[tid] = ::fabs(sdx[tid]);
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
@ -878,7 +896,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
if (sdx[tid] >= 0)
|
if (sdx[tid] >= 0)
|
||||||
{
|
{
|
||||||
sd1[tid] = sdy[tid];
|
sd1[tid] = sdy[tid];
|
||||||
sdabs1[tid] = fabs(sdy[tid]);
|
sdabs1[tid] = ::fabs(sdy[tid]);
|
||||||
sd2[tid] = 0;
|
sd2[tid] = 0;
|
||||||
sdabs2[tid] = 0;
|
sdabs2[tid] = 0;
|
||||||
}
|
}
|
||||||
@ -887,7 +905,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
sd1[tid] = 0;
|
sd1[tid] = 0;
|
||||||
sdabs1[tid] = 0;
|
sdabs1[tid] = 0;
|
||||||
sd2[tid] = sdy[tid];
|
sd2[tid] = sdy[tid];
|
||||||
sdabs2[tid] = fabs(sdy[tid]);
|
sdabs2[tid] = ::fabs(sdy[tid]);
|
||||||
}
|
}
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
@ -978,4 +996,7 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
} // namespace surf
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
@ -71,7 +71,8 @@ cv::gpu::Stream::operator bool() const { throw_nogpu(); return false; }
|
|||||||
|
|
||||||
#include "opencv2/gpu/stream_accessor.hpp"
|
#include "opencv2/gpu/stream_accessor.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);
|
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t & stream = 0);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -80,7 +81,10 @@ namespace cv { namespace gpu { namespace device {
|
|||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
struct Stream::Impl
|
struct Stream::Impl
|
||||||
{
|
{
|
||||||
@ -101,14 +105,14 @@ namespace
|
|||||||
void kernelSet(GpuMat& src, const Scalar& s, cudaStream_t stream)
|
void kernelSet(GpuMat& src, const Scalar& s, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
Scalar_<T> sf = s;
|
Scalar_<T> sf = s;
|
||||||
device::set_to_gpu(src, sf.val, src.channels(), stream);
|
set_to_gpu(src, sf.val, src.channels(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream)
|
void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
Scalar_<T> sf = s;
|
Scalar_<T> sf = s;
|
||||||
device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
|
set_to_gpu(src, sf.val, mask, src.channels(), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -255,7 +259,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype,
|
|||||||
psrc = &(temp = src);
|
psrc = &(temp = src);
|
||||||
|
|
||||||
dst.create( src.size(), rtype );
|
dst.create( src.size(), rtype );
|
||||||
device::convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);
|
convert_gpu(psrc->reshape(1), sdepth, dst.reshape(1), ddepth, alpha, beta, impl->stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::gpu::Stream::operator bool() const
|
cv::gpu::Stream::operator bool() const
|
||||||
|
@ -123,18 +123,19 @@ namespace
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// add
|
// add
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -173,7 +174,7 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
|
|||||||
|
|
||||||
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -235,18 +236,19 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// subtract
|
// subtract
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
void subtract_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -285,7 +287,7 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
|
|||||||
|
|
||||||
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||||
|
|
||||||
@ -347,8 +349,8 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// multiply
|
// multiply
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
||||||
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -357,11 +359,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
void multiply_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -419,7 +422,7 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
|||||||
|
|
||||||
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -469,8 +472,8 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// divide
|
// divide
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream);
|
||||||
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -482,11 +485,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
void divide_gpu(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -544,7 +548,7 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
|||||||
|
|
||||||
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||||
|
|
||||||
@ -593,7 +597,7 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc
|
|||||||
|
|
||||||
void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s)
|
void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -626,18 +630,19 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// absdiff
|
// absdiff
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
|
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -709,7 +714,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
|
|
||||||
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
|
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -753,17 +758,18 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Comparison of two matrixes
|
// Comparison of two matrixes
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_ne(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_ne(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_lt(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_lt(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template <typename T> void compare_le(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template <typename T> void compare_le(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
|
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::device;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -829,13 +835,14 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Unary bitwise logical operations
|
// Unary bitwise logical operations
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src, PtrStepb dst, cudaStream_t stream);
|
void bitwiseNotCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src, PtrStepb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStepb src, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStepb src, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -843,20 +850,23 @@ namespace
|
|||||||
{
|
{
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
cv::gpu::device::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(),
|
OPENCV_DEVICE_NAMESPACE_ bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream);
|
||||||
dst.channels(), src, dst, stream);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
static Caller callers[] = {device::bitwiseMaskNotCaller<unsigned char>, device::bitwiseMaskNotCaller<unsigned char>,
|
|
||||||
device::bitwiseMaskNotCaller<unsigned short>, device::bitwiseMaskNotCaller<unsigned short>,
|
static Caller callers[] =
|
||||||
device::bitwiseMaskNotCaller<unsigned int>, device::bitwiseMaskNotCaller<unsigned int>,
|
{
|
||||||
device::bitwiseMaskNotCaller<unsigned int>};
|
bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>,
|
||||||
|
bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>,
|
||||||
|
bitwiseMaskNotCaller<unsigned int>, bitwiseMaskNotCaller<unsigned int>,
|
||||||
|
bitwiseMaskNotCaller<unsigned int>
|
||||||
|
};
|
||||||
|
|
||||||
CV_Assert(mask.type() == CV_8U && mask.size() == src.size());
|
CV_Assert(mask.type() == CV_8U && mask.size() == src.size());
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
@ -874,17 +884,17 @@ namespace
|
|||||||
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseNotCaller(src, dst, StreamAccessor::getStream(stream));
|
bitwiseNotCaller(src, dst, StreamAccessor::getStream(stream));
|
||||||
else
|
else
|
||||||
::bitwiseNotCaller(src, dst, mask, StreamAccessor::getStream(stream));
|
bitwiseNotCaller(src, dst, mask, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Binary bitwise logical operations
|
// Binary bitwise logical operations
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
void bitwiseOrCaller(int rows, int cols, size_t elem_size1, int cn, const PtrStepb src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -899,8 +909,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStepb src1, const PtrStepb src2, const PtrStepb mask, PtrStepb dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -909,20 +919,22 @@ namespace
|
|||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
cv::gpu::device::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(),
|
OPENCV_DEVICE_NAMESPACE_ bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
||||||
dst.channels(), src1, src2, dst, stream);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
static Caller callers[] = {device::bitwiseMaskOrCaller<unsigned char>, device::bitwiseMaskOrCaller<unsigned char>,
|
|
||||||
device::bitwiseMaskOrCaller<unsigned short>, device::bitwiseMaskOrCaller<unsigned short>,
|
static Caller callers[] =
|
||||||
device::bitwiseMaskOrCaller<unsigned int>, device::bitwiseMaskOrCaller<unsigned int>,
|
{
|
||||||
device::bitwiseMaskOrCaller<unsigned int>};
|
bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>,
|
||||||
|
bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>,
|
||||||
|
bitwiseMaskOrCaller<unsigned int>, bitwiseMaskOrCaller<unsigned int>,
|
||||||
|
bitwiseMaskOrCaller<unsigned int>
|
||||||
|
};
|
||||||
|
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
@ -940,20 +952,23 @@ namespace
|
|||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
cv::gpu::device::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(),
|
OPENCV_DEVICE_NAMESPACE_ bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
||||||
dst.channels(), src1, src2, dst, stream);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
static Caller callers[] = {device::bitwiseMaskAndCaller<unsigned char>, device::bitwiseMaskAndCaller<unsigned char>,
|
|
||||||
device::bitwiseMaskAndCaller<unsigned short>, device::bitwiseMaskAndCaller<unsigned short>,
|
static Caller callers[] =
|
||||||
device::bitwiseMaskAndCaller<unsigned int>, device::bitwiseMaskAndCaller<unsigned int>,
|
{
|
||||||
device::bitwiseMaskAndCaller<unsigned int>};
|
bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>,
|
||||||
|
bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>,
|
||||||
|
bitwiseMaskAndCaller<unsigned int>, bitwiseMaskAndCaller<unsigned int>,
|
||||||
|
bitwiseMaskAndCaller<unsigned int>
|
||||||
|
};
|
||||||
|
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
@ -971,20 +986,23 @@ namespace
|
|||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
cv::gpu::device::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(),
|
OPENCV_DEVICE_NAMESPACE_ bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
|
||||||
dst.channels(), src1, src2, dst, stream);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu;
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
|
||||||
static Caller callers[] = {device::bitwiseMaskXorCaller<unsigned char>, device::bitwiseMaskXorCaller<unsigned char>,
|
|
||||||
device::bitwiseMaskXorCaller<unsigned short>, device::bitwiseMaskXorCaller<unsigned short>,
|
static Caller callers[] =
|
||||||
device::bitwiseMaskXorCaller<unsigned int>, device::bitwiseMaskXorCaller<unsigned int>,
|
{
|
||||||
device::bitwiseMaskXorCaller<unsigned int>};
|
bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>,
|
||||||
|
bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>,
|
||||||
|
bitwiseMaskXorCaller<unsigned int>, bitwiseMaskXorCaller<unsigned int>,
|
||||||
|
bitwiseMaskXorCaller<unsigned int>
|
||||||
|
};
|
||||||
|
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
@ -1001,35 +1019,35 @@ namespace
|
|||||||
void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseOrCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
bitwiseOrCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
else
|
else
|
||||||
::bitwiseOrCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
|
bitwiseOrCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseAndCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
bitwiseAndCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
else
|
else
|
||||||
::bitwiseAndCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
|
bitwiseAndCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
|
||||||
{
|
{
|
||||||
if (mask.empty())
|
if (mask.empty())
|
||||||
::bitwiseXorCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
bitwiseXorCaller(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||||
else
|
else
|
||||||
::bitwiseXorCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
|
bitwiseXorCaller(src1, src2, dst, mask, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Minimum and maximum operations
|
// Minimum and maximum operations
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||||
|
|
||||||
@ -1041,7 +1059,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -1050,14 +1069,14 @@ namespace
|
|||||||
{
|
{
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
device::min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
OPENCV_DEVICE_NAMESPACE_ min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
device::min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
OPENCV_DEVICE_NAMESPACE_ min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -1065,14 +1084,14 @@ namespace
|
|||||||
{
|
{
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
device::max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
OPENCV_DEVICE_NAMESPACE_ max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
device::max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
OPENCV_DEVICE_NAMESPACE_ max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1136,18 +1155,18 @@ void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// threshold
|
// threshold
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type,
|
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type, cudaStream_t stream);
|
||||||
cudaStream_t stream);
|
|
||||||
}}}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T> void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream)
|
template <typename T> void threshold_caller(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
device::threshold_gpu<T>(src, dst, saturate_cast<T>(thresh), saturate_cast<T>(maxVal), type, stream);
|
OPENCV_DEVICE_NAMESPACE_ threshold_gpu<T>(src, dst, saturate_cast<T>(thresh), saturate_cast<T>(maxVal), type, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1204,14 +1223,17 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// pow
|
// pow
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
CV_Assert(src.depth() != CV_64F);
|
CV_Assert(src.depth() != CV_64F);
|
||||||
dst.create(src.size(), src.type());
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
@ -1219,9 +1241,9 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
|||||||
|
|
||||||
static const caller_t callers[] =
|
static const caller_t callers[] =
|
||||||
{
|
{
|
||||||
device::pow_caller<unsigned char>, device::pow_caller<signed char>,
|
pow_caller<unsigned char>, pow_caller<signed char>,
|
||||||
device::pow_caller<unsigned short>, device::pow_caller<short>,
|
pow_caller<unsigned short>, pow_caller<short>,
|
||||||
device::pow_caller<int>, device::pow_caller<float>
|
pow_caller<int>, pow_caller<float>
|
||||||
};
|
};
|
||||||
|
|
||||||
callers[src.depth()](src.reshape(1), (float)power, dst.reshape(1), StreamAccessor::getStream(stream));
|
callers[src.depth()](src.reshape(1), (float)power, dst.reshape(1), StreamAccessor::getStream(stream));
|
||||||
@ -1230,14 +1252,17 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// addWeighted
|
// addWeighted
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T1, typename T2, typename D>
|
template <typename T1, typename T2, typename D>
|
||||||
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream)
|
void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
CV_Assert(src1.size() == src2.size());
|
CV_Assert(src1.size() == src2.size());
|
||||||
CV_Assert(src1.type() == src2.type() || (dtype >= 0 && src1.channels() == src2.channels()));
|
CV_Assert(src1.type() == src2.type() || (dtype >= 0 && src1.channels() == src2.channels()));
|
||||||
|
|
||||||
@ -1256,8 +1281,6 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2,
|
|||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
static const caller_t callers[7][7][7] =
|
static const caller_t callers[7][7][7] =
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
|
@ -735,14 +735,21 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Separable Linear Filter
|
// Separable Linear Filter
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace filters
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace row_filter
|
||||||
{
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
void linearRowFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace column_filter
|
||||||
|
{
|
||||||
template <typename T, typename D>
|
template <typename T, typename D>
|
||||||
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
void linearColumnFilter_gpu(const DevMem2Db& src, const DevMem2Db& dst, const float kernel[], int ksize, int anchor, int brd_type, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -796,6 +803,8 @@ namespace
|
|||||||
|
|
||||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
|
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ row_filter;
|
||||||
|
|
||||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R};
|
||||||
|
|
||||||
if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4))
|
if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4))
|
||||||
@ -837,25 +846,25 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
|
|||||||
switch (srcType)
|
switch (srcType)
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
func = filters::linearRowFilter_gpu<uchar, float>;
|
func = linearRowFilter_gpu<uchar, float>;
|
||||||
break;
|
break;
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
func = filters::linearRowFilter_gpu<uchar4, float4>;
|
func = linearRowFilter_gpu<uchar4, float4>;
|
||||||
break;
|
break;
|
||||||
/*case CV_16SC1:
|
/*case CV_16SC1:
|
||||||
func = filters::linearRowFilter_gpu<short, float>;
|
func = linearRowFilter_gpu<short, float>;
|
||||||
break;*/
|
break;*/
|
||||||
/*case CV_16SC2:
|
/*case CV_16SC2:
|
||||||
func = filters::linearRowFilter_gpu<short2, float2>;
|
func = linearRowFilter_gpu<short2, float2>;
|
||||||
break;*/
|
break;*/
|
||||||
case CV_16SC3:
|
case CV_16SC3:
|
||||||
func = filters::linearRowFilter_gpu<short3, float3>;
|
func = linearRowFilter_gpu<short3, float3>;
|
||||||
break;
|
break;
|
||||||
case CV_32SC1:
|
case CV_32SC1:
|
||||||
func = filters::linearRowFilter_gpu<int, float>;
|
func = linearRowFilter_gpu<int, float>;
|
||||||
break;
|
break;
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
func = filters::linearRowFilter_gpu<float, float>;
|
func = linearRowFilter_gpu<float, float>;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -909,6 +918,8 @@ namespace
|
|||||||
|
|
||||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
|
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ column_filter;
|
||||||
|
|
||||||
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
|
static const nppFilter1D_t nppFilter1D_callers[] = {0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R};
|
||||||
|
|
||||||
if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4))
|
if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4))
|
||||||
@ -950,25 +961,25 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
|
|||||||
switch (dstType)
|
switch (dstType)
|
||||||
{
|
{
|
||||||
case CV_8UC1:
|
case CV_8UC1:
|
||||||
func = filters::linearColumnFilter_gpu<float, uchar>;
|
func = linearColumnFilter_gpu<float, uchar>;
|
||||||
break;
|
break;
|
||||||
case CV_8UC4:
|
case CV_8UC4:
|
||||||
func = filters::linearColumnFilter_gpu<float4, uchar4>;
|
func = linearColumnFilter_gpu<float4, uchar4>;
|
||||||
break;
|
break;
|
||||||
/*case CV_16SC1:
|
/*case CV_16SC1:
|
||||||
func = filters::linearColumnFilter_gpu<float, short>;
|
func = linearColumnFilter_gpu<float, short>;
|
||||||
break;*/
|
break;*/
|
||||||
/*case CV_16SC2:
|
/*case CV_16SC2:
|
||||||
func = filters::linearColumnFilter_gpu<float2, short2>;
|
func = linearColumnFilter_gpu<float2, short2>;
|
||||||
break;*/
|
break;*/
|
||||||
case CV_16SC3:
|
case CV_16SC3:
|
||||||
func = filters::linearColumnFilter_gpu<float3, short3>;
|
func = linearColumnFilter_gpu<float3, short3>;
|
||||||
break;
|
break;
|
||||||
case CV_32SC1:
|
case CV_32SC1:
|
||||||
func = filters::linearColumnFilter_gpu<float, int>;
|
func = linearColumnFilter_gpu<float, int>;
|
||||||
break;
|
break;
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
func = filters::linearColumnFilter_gpu<float, float>;
|
func = linearColumnFilter_gpu<float, float>;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,863 +0,0 @@
|
|||||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
||||||
//
|
|
||||||
// By downloading, copying, installing or using the software you agree to this license.
|
|
||||||
// If you do not agree to this license, do not download, install,
|
|
||||||
// copy or use the software.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// License Agreement
|
|
||||||
// For Open Source Computer Vision Library
|
|
||||||
//
|
|
||||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
|
||||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
|
||||||
// Third party copyrights are property of their respective owners.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
// are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// * Redistribution's of source code must retain the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// * The name of the copyright holders may not be used to endorse or promote products
|
|
||||||
// derived from this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// This software is provided by the copyright holders and contributors "as is" and
|
|
||||||
// any express or implied warranties, including, but not limited to, the implied
|
|
||||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
||||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
||||||
// indirect, incidental, special, exemplary, or consequential damages
|
|
||||||
// (including, but not limited to, procurement of substitute goods or services;
|
|
||||||
// loss of use, data, or profits; or business interruption) however caused
|
|
||||||
// and on any theory of liability, whether in contract, strict liability,
|
|
||||||
// or tort (including negligence or otherwise) arising in any way out of
|
|
||||||
// the use of this software, even if advised of the possibility of such damage.
|
|
||||||
//
|
|
||||||
//M*/
|
|
||||||
|
|
||||||
#include "precomp.hpp"
|
|
||||||
|
|
||||||
using namespace cv;
|
|
||||||
using namespace cv::gpu;
|
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
cv::gpu::GpuMat::GpuMat(const GpuMat& m) :
|
|
||||||
flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend)
|
|
||||||
{
|
|
||||||
if (refcount)
|
|
||||||
CV_XADD(refcount, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::gpu::GpuMat::GpuMat(int rows_, int cols_, int type_, void* data_, size_t step_) :
|
|
||||||
flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(rows_), cols(cols_),
|
|
||||||
step(step_), data((uchar*)data_), refcount(0),
|
|
||||||
datastart((uchar*)data_), dataend((uchar*)data_)
|
|
||||||
{
|
|
||||||
size_t minstep = cols * elemSize();
|
|
||||||
|
|
||||||
if (step == Mat::AUTO_STEP)
|
|
||||||
{
|
|
||||||
step = minstep;
|
|
||||||
flags |= Mat::CONTINUOUS_FLAG;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (rows == 1)
|
|
||||||
step = minstep;
|
|
||||||
|
|
||||||
CV_DbgAssert(step >= minstep);
|
|
||||||
|
|
||||||
flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
|
|
||||||
}
|
|
||||||
dataend += step * (rows - 1) + minstep;
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::gpu::GpuMat::GpuMat(Size size_, int type_, void* data_, size_t step_) :
|
|
||||||
flags(Mat::MAGIC_VAL + (type_ & TYPE_MASK)), rows(size_.height), cols(size_.width),
|
|
||||||
step(step_), data((uchar*)data_), refcount(0),
|
|
||||||
datastart((uchar*)data_), dataend((uchar*)data_)
|
|
||||||
{
|
|
||||||
size_t minstep = cols * elemSize();
|
|
||||||
|
|
||||||
if (step == Mat::AUTO_STEP)
|
|
||||||
{
|
|
||||||
step = minstep;
|
|
||||||
flags |= Mat::CONTINUOUS_FLAG;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (rows == 1)
|
|
||||||
step = minstep;
|
|
||||||
|
|
||||||
CV_DbgAssert(step >= minstep);
|
|
||||||
|
|
||||||
flags |= step == minstep ? Mat::CONTINUOUS_FLAG : 0;
|
|
||||||
}
|
|
||||||
dataend += step * (rows - 1) + minstep;
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::gpu::GpuMat::GpuMat(const GpuMat& m, const Range& rowRange, const Range& colRange)
|
|
||||||
{
|
|
||||||
flags = m.flags;
|
|
||||||
step = m.step; refcount = m.refcount;
|
|
||||||
data = m.data; datastart = m.datastart; dataend = m.dataend;
|
|
||||||
|
|
||||||
if (rowRange == Range::all())
|
|
||||||
rows = m.rows;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CV_Assert(0 <= rowRange.start && rowRange.start <= rowRange.end && rowRange.end <= m.rows);
|
|
||||||
|
|
||||||
rows = rowRange.size();
|
|
||||||
data += step*rowRange.start;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (colRange == Range::all())
|
|
||||||
cols = m.cols;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
CV_Assert(0 <= colRange.start && colRange.start <= colRange.end && colRange.end <= m.cols);
|
|
||||||
|
|
||||||
cols = colRange.size();
|
|
||||||
data += colRange.start*elemSize();
|
|
||||||
flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rows == 1)
|
|
||||||
flags |= Mat::CONTINUOUS_FLAG;
|
|
||||||
|
|
||||||
if (refcount)
|
|
||||||
CV_XADD(refcount, 1);
|
|
||||||
|
|
||||||
if (rows <= 0 || cols <= 0)
|
|
||||||
rows = cols = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::gpu::GpuMat::GpuMat(const GpuMat& m, const Rect& roi) :
|
|
||||||
flags(m.flags), rows(roi.height), cols(roi.width),
|
|
||||||
step(m.step), data(m.data + roi.y*step), refcount(m.refcount),
|
|
||||||
datastart(m.datastart), dataend(m.dataend)
|
|
||||||
{
|
|
||||||
flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1;
|
|
||||||
data += roi.x * elemSize();
|
|
||||||
|
|
||||||
CV_Assert(0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.cols && 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.rows);
|
|
||||||
|
|
||||||
if (refcount)
|
|
||||||
CV_XADD(refcount, 1);
|
|
||||||
|
|
||||||
if (rows <= 0 || cols <= 0)
|
|
||||||
rows = cols = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
cv::gpu::GpuMat::GpuMat(const Mat& m) :
|
|
||||||
flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0)
|
|
||||||
{
|
|
||||||
upload(m);
|
|
||||||
}
|
|
||||||
|
|
||||||
GpuMat& cv::gpu::GpuMat::operator = (const GpuMat& m)
|
|
||||||
{
|
|
||||||
if (this != &m)
|
|
||||||
{
|
|
||||||
GpuMat temp(m);
|
|
||||||
swap(temp);
|
|
||||||
}
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::swap(GpuMat& b)
|
|
||||||
{
|
|
||||||
std::swap(flags, b.flags);
|
|
||||||
std::swap(rows, b.rows);
|
|
||||||
std::swap(cols, b.cols);
|
|
||||||
std::swap(step, b.step);
|
|
||||||
std::swap(data, b.data);
|
|
||||||
std::swap(datastart, b.datastart);
|
|
||||||
std::swap(dataend, b.dataend);
|
|
||||||
std::swap(refcount, b.refcount);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::locateROI(Size& wholeSize, Point& ofs) const
|
|
||||||
{
|
|
||||||
size_t esz = elemSize();
|
|
||||||
ptrdiff_t delta1 = data - datastart;
|
|
||||||
ptrdiff_t delta2 = dataend - datastart;
|
|
||||||
|
|
||||||
CV_DbgAssert(step > 0);
|
|
||||||
|
|
||||||
if (delta1 == 0)
|
|
||||||
ofs.x = ofs.y = 0;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ofs.y = static_cast<int>(delta1 / step);
|
|
||||||
ofs.x = static_cast<int>((delta1 - step * ofs.y) / esz);
|
|
||||||
|
|
||||||
CV_DbgAssert(data == datastart + ofs.y * step + ofs.x * esz);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t minstep = (ofs.x + cols) * esz;
|
|
||||||
|
|
||||||
wholeSize.height = std::max(static_cast<int>((delta2 - minstep) / step + 1), ofs.y + rows);
|
|
||||||
wholeSize.width = std::max(static_cast<int>((delta2 - step * (wholeSize.height - 1)) / esz), ofs.x + cols);
|
|
||||||
}
|
|
||||||
|
|
||||||
GpuMat& cv::gpu::GpuMat::adjustROI(int dtop, int dbottom, int dleft, int dright)
|
|
||||||
{
|
|
||||||
Size wholeSize;
|
|
||||||
Point ofs;
|
|
||||||
locateROI(wholeSize, ofs);
|
|
||||||
|
|
||||||
size_t esz = elemSize();
|
|
||||||
|
|
||||||
int row1 = std::max(ofs.y - dtop, 0);
|
|
||||||
int row2 = std::min(ofs.y + rows + dbottom, wholeSize.height);
|
|
||||||
|
|
||||||
int col1 = std::max(ofs.x - dleft, 0);
|
|
||||||
int col2 = std::min(ofs.x + cols + dright, wholeSize.width);
|
|
||||||
|
|
||||||
data += (row1 - ofs.y) * step + (col1 - ofs.x) * esz;
|
|
||||||
rows = row2 - row1;
|
|
||||||
cols = col2 - col1;
|
|
||||||
|
|
||||||
if (esz * cols == step || rows == 1)
|
|
||||||
flags |= Mat::CONTINUOUS_FLAG;
|
|
||||||
else
|
|
||||||
flags &= ~Mat::CONTINUOUS_FLAG;
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
GpuMat cv::gpu::GpuMat::reshape(int new_cn, int new_rows) const
|
|
||||||
{
|
|
||||||
GpuMat hdr = *this;
|
|
||||||
|
|
||||||
int cn = channels();
|
|
||||||
if (new_cn == 0)
|
|
||||||
new_cn = cn;
|
|
||||||
|
|
||||||
int total_width = cols * cn;
|
|
||||||
|
|
||||||
if ((new_cn > total_width || total_width % new_cn != 0) && new_rows == 0)
|
|
||||||
new_rows = rows * total_width / new_cn;
|
|
||||||
|
|
||||||
if (new_rows != 0 && new_rows != rows)
|
|
||||||
{
|
|
||||||
int total_size = total_width * rows;
|
|
||||||
|
|
||||||
if (!isContinuous())
|
|
||||||
CV_Error(CV_BadStep, "The matrix is not continuous, thus its number of rows can not be changed");
|
|
||||||
|
|
||||||
if ((unsigned)new_rows > (unsigned)total_size)
|
|
||||||
CV_Error(CV_StsOutOfRange, "Bad new number of rows");
|
|
||||||
|
|
||||||
total_width = total_size / new_rows;
|
|
||||||
|
|
||||||
if (total_width * new_rows != total_size)
|
|
||||||
CV_Error(CV_StsBadArg, "The total number of matrix elements is not divisible by the new number of rows");
|
|
||||||
|
|
||||||
hdr.rows = new_rows;
|
|
||||||
hdr.step = total_width * elemSize1();
|
|
||||||
}
|
|
||||||
|
|
||||||
int new_width = total_width / new_cn;
|
|
||||||
|
|
||||||
if (new_width * new_cn != total_width)
|
|
||||||
CV_Error(CV_BadNumChannels, "The total width is not divisible by the new number of channels");
|
|
||||||
|
|
||||||
hdr.cols = new_width;
|
|
||||||
hdr.flags = (hdr.flags & ~CV_MAT_CN_MASK) | ((new_cn - 1) << CV_CN_SHIFT);
|
|
||||||
|
|
||||||
return hdr;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class GpuFuncTable
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual ~GpuFuncTable() {}
|
|
||||||
|
|
||||||
virtual void copy(const Mat& src, GpuMat& dst) const = 0;
|
|
||||||
virtual void copy(const GpuMat& src, Mat& dst) const = 0;
|
|
||||||
virtual void copy(const GpuMat& src, GpuMat& dst) const = 0;
|
|
||||||
|
|
||||||
virtual void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const = 0;
|
|
||||||
|
|
||||||
virtual void convert(const GpuMat& src, GpuMat& dst) const = 0;
|
|
||||||
virtual void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const = 0;
|
|
||||||
|
|
||||||
virtual void setTo(GpuMat& m, const Scalar& s, const GpuMat& mask) const = 0;
|
|
||||||
|
|
||||||
virtual void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const = 0;
|
|
||||||
virtual void free(void* devPtr) const = 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
|
||||||
|
|
||||||
class EmptyFuncTable : public GpuFuncTable
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
void copy(const Mat&, GpuMat&) const { throw_nogpu(); }
|
|
||||||
void copy(const GpuMat&, Mat&) const { throw_nogpu(); }
|
|
||||||
void copy(const GpuMat&, GpuMat&) const { throw_nogpu(); }
|
|
||||||
|
|
||||||
void copyWithMask(const GpuMat&, GpuMat&, const GpuMat&) const { throw_nogpu(); }
|
|
||||||
|
|
||||||
void convert(const GpuMat&, GpuMat&) const { throw_nogpu(); }
|
|
||||||
void convert(const GpuMat&, GpuMat&, double, double) const { throw_nogpu(); }
|
|
||||||
|
|
||||||
void setTo(GpuMat&, const Scalar&, const GpuMat&) const { throw_nogpu(); }
|
|
||||||
|
|
||||||
void mallocPitch(void**, size_t*, size_t, size_t) const { throw_nogpu(); }
|
|
||||||
void free(void*) const {}
|
|
||||||
};
|
|
||||||
|
|
||||||
const GpuFuncTable* gpuFuncTable()
|
|
||||||
{
|
|
||||||
static EmptyFuncTable empty;
|
|
||||||
return ∅
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
|
||||||
{
|
|
||||||
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t& stream = 0);
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
|
|
||||||
template <typename T>
|
|
||||||
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
|
||||||
|
|
||||||
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// Convert
|
|
||||||
|
|
||||||
template<int n> struct NPPTypeTraits;
|
|
||||||
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
|
||||||
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
|
|
||||||
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
|
|
||||||
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
|
|
||||||
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
|
|
||||||
|
|
||||||
template<int SDEPTH, int DDEPTH> struct NppConvertFunc
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
|
||||||
|
|
||||||
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI);
|
|
||||||
};
|
|
||||||
template<int DDEPTH> struct NppConvertFunc<CV_32F, DDEPTH>
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
|
||||||
|
|
||||||
typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode);
|
|
||||||
};
|
|
||||||
|
|
||||||
template<int SDEPTH, int DDEPTH, typename NppConvertFunc<SDEPTH, DDEPTH>::func_ptr func> struct NppCvt
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
|
||||||
|
|
||||||
static void cvt(const GpuMat& src, GpuMat& dst)
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = src.cols;
|
|
||||||
sz.height = src.rows;
|
|
||||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
|
||||||
|
|
||||||
static void cvt(const GpuMat& src, GpuMat& dst)
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = src.cols;
|
|
||||||
sz.height = src.rows;
|
|
||||||
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
void convertToKernelCaller(const GpuMat& src, GpuMat& dst)
|
|
||||||
{
|
|
||||||
device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////
|
|
||||||
// Set
|
|
||||||
|
|
||||||
template<int SDEPTH, int SCN> struct NppSetFunc
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
|
|
||||||
};
|
|
||||||
template<int SDEPTH> struct NppSetFunc<SDEPTH, 1>
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
|
|
||||||
};
|
|
||||||
|
|
||||||
template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
static void set(GpuMat& src, const Scalar& s)
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = src.cols;
|
|
||||||
sz.height = src.rows;
|
|
||||||
|
|
||||||
Scalar_<src_t> nppS = s;
|
|
||||||
|
|
||||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
static void set(GpuMat& src, const Scalar& s)
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = src.cols;
|
|
||||||
sz.height = src.rows;
|
|
||||||
|
|
||||||
Scalar_<src_t> nppS = s;
|
|
||||||
|
|
||||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void kernelSet(GpuMat& src, const Scalar& s)
|
|
||||||
{
|
|
||||||
Scalar_<T> sf = s;
|
|
||||||
device::set_to_gpu(src, sf.val, src.channels(), 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
template<int SDEPTH, int SCN> struct NppSetMaskFunc
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
|
|
||||||
};
|
|
||||||
template<int SDEPTH> struct NppSetMaskFunc<SDEPTH, 1>
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
|
|
||||||
};
|
|
||||||
|
|
||||||
template<int SDEPTH, int SCN, typename NppSetMaskFunc<SDEPTH, SCN>::func_ptr func> struct NppSetMask
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
static void set(GpuMat& src, const Scalar& s, const GpuMat& mask)
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = src.cols;
|
|
||||||
sz.height = src.rows;
|
|
||||||
|
|
||||||
Scalar_<src_t> nppS = s;
|
|
||||||
|
|
||||||
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
|
|
||||||
{
|
|
||||||
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
|
||||||
|
|
||||||
static void set(GpuMat& src, const Scalar& s, const GpuMat& mask)
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = src.cols;
|
|
||||||
sz.height = src.rows;
|
|
||||||
|
|
||||||
Scalar_<src_t> nppS = s;
|
|
||||||
|
|
||||||
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void kernelSetMask(GpuMat& src, const Scalar& s, const GpuMat& mask)
|
|
||||||
{
|
|
||||||
Scalar_<T> sf = s;
|
|
||||||
device::set_to_gpu(src, sf.val, mask, src.channels(), 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class CudaFuncTable : public GpuFuncTable
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
virtual void copy(const Mat& src, GpuMat& dst) const
|
|
||||||
{
|
|
||||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
|
||||||
}
|
|
||||||
virtual void copy(const GpuMat& src, Mat& dst) const
|
|
||||||
{
|
|
||||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
|
||||||
}
|
|
||||||
virtual void copy(const GpuMat& src, GpuMat& dst) const
|
|
||||||
{
|
|
||||||
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
|
||||||
{
|
|
||||||
device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels());
|
|
||||||
}
|
|
||||||
|
|
||||||
void convert(const GpuMat& src, GpuMat& dst) const
|
|
||||||
{
|
|
||||||
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst);
|
|
||||||
static const caller_t callers[7][7][7] =
|
|
||||||
{
|
|
||||||
{
|
|
||||||
/* 8U -> 8U */ {0, 0, 0, 0},
|
|
||||||
/* 8U -> 8S */ {convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller},
|
|
||||||
/* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},
|
|
||||||
/* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},
|
|
||||||
/* 8U -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 8U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
/* 8S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 8S -> 8S */ {0,0,0,0},
|
|
||||||
/* 8S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 8S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 8S -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 8S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 8S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
/* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},
|
|
||||||
/* 16U -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16U -> 16U */ {0,0,0,0},
|
|
||||||
/* 16U -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
/* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},
|
|
||||||
/* 16S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16S -> 16S */ {0,0,0,0},
|
|
||||||
/* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 16S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
/* 32S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32S -> 32S */ {0,0,0,0},
|
|
||||||
/* 32S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
/* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 32F -> 32F */ {0,0,0,0},
|
|
||||||
/* 32F -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
/* 64F -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 64F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 64F -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 64F -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 64F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 64F -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
|
||||||
/* 64F -> 64F */ {0,0,0,0}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
caller_t func = callers[src.depth()][dst.depth()][src.channels() - 1];
|
|
||||||
CV_DbgAssert(func != 0);
|
|
||||||
|
|
||||||
func(src, dst);
|
|
||||||
}
|
|
||||||
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const
|
|
||||||
{
|
|
||||||
device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta);
|
|
||||||
}
|
|
||||||
|
|
||||||
void setTo(GpuMat& m, const Scalar& s, const GpuMat& mask) const
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = m.cols;
|
|
||||||
sz.height = m.rows;
|
|
||||||
|
|
||||||
if (mask.empty())
|
|
||||||
{
|
|
||||||
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
|
||||||
{
|
|
||||||
cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (m.depth() == CV_8U)
|
|
||||||
{
|
|
||||||
int cn = m.channels();
|
|
||||||
|
|
||||||
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
|
||||||
{
|
|
||||||
int val = saturate_cast<gpu::uchar>(s[0]);
|
|
||||||
cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef void (*caller_t)(GpuMat& src, const Scalar& s);
|
|
||||||
static const caller_t callers[7][4] =
|
|
||||||
{
|
|
||||||
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set,kernelSet<gpu::uchar>,kernelSet<gpu::uchar>,NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
|
|
||||||
{kernelSet<gpu::schar>,kernelSet<gpu::schar>,kernelSet<gpu::schar>,kernelSet<gpu::schar>},
|
|
||||||
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set,NppSet<CV_16U, 2, nppiSet_16u_C2R>::set,kernelSet<gpu::ushort>,NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
|
|
||||||
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set,NppSet<CV_16S, 2, nppiSet_16s_C2R>::set,kernelSet<short>,NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
|
|
||||||
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set,kernelSet<int>,kernelSet<int>,NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
|
|
||||||
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set,kernelSet<float>,kernelSet<float>,NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
|
|
||||||
{kernelSet<double>,kernelSet<double>,kernelSet<double>,kernelSet<double>}
|
|
||||||
};
|
|
||||||
|
|
||||||
callers[m.depth()][m.channels() - 1](m, s);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
typedef void (*caller_t)(GpuMat& src, const Scalar& s, const GpuMat& mask);
|
|
||||||
|
|
||||||
static const caller_t callers[7][4] =
|
|
||||||
{
|
|
||||||
{NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set,kernelSetMask<gpu::uchar>,kernelSetMask<gpu::uchar>,NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},
|
|
||||||
{kernelSetMask<gpu::schar>,kernelSetMask<gpu::schar>,kernelSetMask<gpu::schar>,kernelSetMask<gpu::schar>},
|
|
||||||
{NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set,kernelSetMask<gpu::ushort>,kernelSetMask<gpu::ushort>,NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},
|
|
||||||
{NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set,kernelSetMask<short>,kernelSetMask<short>,NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},
|
|
||||||
{NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set,kernelSetMask<int>,kernelSetMask<int>,NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},
|
|
||||||
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set,kernelSetMask<float>,kernelSetMask<float>,NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},
|
|
||||||
{kernelSetMask<double>,kernelSetMask<double>,kernelSetMask<double>,kernelSetMask<double>}
|
|
||||||
};
|
|
||||||
|
|
||||||
callers[m.depth()][m.channels() - 1](m, s, mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
|
|
||||||
{
|
|
||||||
cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
|
||||||
}
|
|
||||||
|
|
||||||
void free(void* devPtr) const
|
|
||||||
{
|
|
||||||
cudaFree(devPtr);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
const GpuFuncTable* gpuFuncTable()
|
|
||||||
{
|
|
||||||
static CudaFuncTable cuda;
|
|
||||||
return &cuda;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::upload(const Mat& m)
|
|
||||||
{
|
|
||||||
CV_DbgAssert(!m.empty());
|
|
||||||
|
|
||||||
create(m.size(), m.type());
|
|
||||||
|
|
||||||
gpuFuncTable()->copy(m, *this);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::download(cv::Mat& m) const
|
|
||||||
{
|
|
||||||
CV_DbgAssert(!empty());
|
|
||||||
|
|
||||||
m.create(size(), type());
|
|
||||||
|
|
||||||
gpuFuncTable()->copy(*this, m);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::copyTo(GpuMat& m) const
|
|
||||||
{
|
|
||||||
CV_DbgAssert(!empty());
|
|
||||||
|
|
||||||
m.create(size(), type());
|
|
||||||
|
|
||||||
gpuFuncTable()->copy(*this, m);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::copyTo(GpuMat& mat, const GpuMat& mask) const
|
|
||||||
{
|
|
||||||
if (mask.empty())
|
|
||||||
copyTo(mat);
|
|
||||||
else
|
|
||||||
{
|
|
||||||
mat.create(size(), type());
|
|
||||||
|
|
||||||
gpuFuncTable()->copyWithMask(*this, mat, mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::convertTo(GpuMat& dst, int rtype, double alpha, double beta) const
|
|
||||||
{
|
|
||||||
bool noScale = fabs(alpha - 1) < numeric_limits<double>::epsilon() && fabs(beta) < numeric_limits<double>::epsilon();
|
|
||||||
|
|
||||||
if (rtype < 0)
|
|
||||||
rtype = type();
|
|
||||||
else
|
|
||||||
rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels());
|
|
||||||
|
|
||||||
int scn = channels();
|
|
||||||
int sdepth = depth();
|
|
||||||
int ddepth = CV_MAT_DEPTH(rtype);
|
|
||||||
if (sdepth == ddepth && noScale)
|
|
||||||
{
|
|
||||||
copyTo(dst);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
GpuMat temp;
|
|
||||||
const GpuMat* psrc = this;
|
|
||||||
if (sdepth != ddepth && psrc == &dst)
|
|
||||||
{
|
|
||||||
temp = *this;
|
|
||||||
psrc = &temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
dst.create(size(), rtype);
|
|
||||||
|
|
||||||
if (noScale)
|
|
||||||
gpuFuncTable()->convert(*psrc, dst);
|
|
||||||
else
|
|
||||||
gpuFuncTable()->convert(*psrc, dst, alpha, beta);
|
|
||||||
}
|
|
||||||
|
|
||||||
GpuMat& cv::gpu::GpuMat::setTo(const Scalar& s, const GpuMat& mask)
|
|
||||||
{
|
|
||||||
CV_Assert(mask.empty() || mask.type() == CV_8UC1);
|
|
||||||
CV_DbgAssert(!empty());
|
|
||||||
|
|
||||||
gpuFuncTable()->setTo(*this, s, mask);
|
|
||||||
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::create(int _rows, int _cols, int _type)
|
|
||||||
{
|
|
||||||
_type &= TYPE_MASK;
|
|
||||||
|
|
||||||
if (rows == _rows && cols == _cols && type() == _type && data)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (data)
|
|
||||||
release();
|
|
||||||
|
|
||||||
CV_DbgAssert(_rows >= 0 && _cols >= 0);
|
|
||||||
|
|
||||||
if (_rows > 0 && _cols > 0)
|
|
||||||
{
|
|
||||||
flags = Mat::MAGIC_VAL + _type;
|
|
||||||
rows = _rows;
|
|
||||||
cols = _cols;
|
|
||||||
|
|
||||||
size_t esz = elemSize();
|
|
||||||
|
|
||||||
void* devPtr;
|
|
||||||
gpuFuncTable()->mallocPitch(&devPtr, &step, esz * cols, rows);
|
|
||||||
|
|
||||||
// Single row must be continuous
|
|
||||||
if (rows == 1)
|
|
||||||
step = esz * cols;
|
|
||||||
|
|
||||||
if (esz * cols == step)
|
|
||||||
flags |= Mat::CONTINUOUS_FLAG;
|
|
||||||
|
|
||||||
int64 _nettosize = static_cast<int64>(step) * rows;
|
|
||||||
size_t nettosize = static_cast<size_t>(_nettosize);
|
|
||||||
|
|
||||||
datastart = data = static_cast<uchar*>(devPtr);
|
|
||||||
dataend = data + nettosize;
|
|
||||||
|
|
||||||
refcount = static_cast<int*>(fastMalloc(sizeof(*refcount)));
|
|
||||||
*refcount = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::GpuMat::release()
|
|
||||||
{
|
|
||||||
if (refcount && CV_XADD(refcount, -1) == 1)
|
|
||||||
{
|
|
||||||
fastFree(refcount);
|
|
||||||
|
|
||||||
gpuFuncTable()->free(datastart);
|
|
||||||
}
|
|
||||||
|
|
||||||
data = datastart = dataend = 0;
|
|
||||||
step = rows = cols = 0;
|
|
||||||
refcount = 0;
|
|
||||||
}
|
|
@ -60,8 +60,10 @@ std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nog
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace hog {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace hog
|
||||||
|
{
|
||||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int nblocks_win_x, int nblocks_win_y);
|
int nblocks_win_x, int nblocks_win_y);
|
||||||
|
|
||||||
@ -91,9 +93,11 @@ void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::Dev
|
|||||||
|
|
||||||
void resize_8UC1(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
void resize_8UC1(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
||||||
void resize_8UC4(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
void resize_8UC4(const cv::gpu::DevMem2Db& src, cv::gpu::DevMem2Db dst);
|
||||||
|
}
|
||||||
|
|
||||||
}}}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE;
|
||||||
|
|
||||||
cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size,
|
cv::gpu::HOGDescriptor::HOGDescriptor(Size win_size, Size block_size, Size block_stride, Size cell_size,
|
||||||
int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)
|
int nbins, double win_sigma, double threshold_L2hys, bool gamma_correction, int nlevels)
|
||||||
|
@ -107,15 +107,20 @@ void cv::gpu::CannyBuf::release() { throw_nogpu(); }
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// remap
|
// remap
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace remap
|
||||||
{
|
{
|
||||||
template <typename T> void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst,
|
template <typename T>
|
||||||
|
void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst,
|
||||||
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)
|
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::imgproc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ remap;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation,
|
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation,
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||||
@ -155,13 +160,19 @@ void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const Gp
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// meanShiftFiltering_GPU
|
// meanShiftFiltering_GPU
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
void meanShiftFiltering_gpu(const DevMem2Db& src, DevMem2Db dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
|
void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
if( src.empty() )
|
if( src.empty() )
|
||||||
CV_Error( CV_StsBadArg, "The input image is empty" );
|
CV_Error( CV_StsBadArg, "The input image is empty" );
|
||||||
|
|
||||||
@ -180,19 +191,25 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
|
|||||||
eps = 1.f;
|
eps = 1.f;
|
||||||
eps = (float)std::max(criteria.epsilon, 0.0);
|
eps = (float)std::max(criteria.epsilon, 0.0);
|
||||||
|
|
||||||
imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
|
meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// meanShiftProc_GPU
|
// meanShiftProc_GPU
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
void meanShiftProc_gpu(const DevMem2Db& src, DevMem2Db dstr, DevMem2Db dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
|
void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
if( src.empty() )
|
if( src.empty() )
|
||||||
CV_Error( CV_StsBadArg, "The input image is empty" );
|
CV_Error( CV_StsBadArg, "The input image is empty" );
|
||||||
|
|
||||||
@ -212,26 +229,32 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
|
|||||||
eps = 1.f;
|
eps = 1.f;
|
||||||
eps = (float)std::max(criteria.epsilon, 0.0);
|
eps = (float)std::max(criteria.epsilon, 0.0);
|
||||||
|
|
||||||
imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
|
meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// drawColorDisp
|
// drawColorDisp
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
void drawColorDisp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
||||||
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2Db& dst, int ndisp, const cudaStream_t& stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
|
void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
dst.create(src.size(), CV_8UC4);
|
dst.create(src.size(), CV_8UC4);
|
||||||
|
|
||||||
imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, dst, ndisp, stream);
|
drawColorDisp_gpu((DevMem2D_<T>)src, dst, ndisp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
|
typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream);
|
||||||
@ -249,19 +272,26 @@ void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& s
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// reprojectImageTo3D
|
// reprojectImageTo3D
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void reprojectImageTo3D_gpu(const DevMem2Db& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
void reprojectImageTo3D_gpu(const DevMem2Db& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
||||||
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)
|
void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
xyzw.create(disp.rows, disp.cols, CV_32FC4);
|
xyzw.create(disp.rows, disp.cols, CV_32FC4);
|
||||||
imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);
|
|
||||||
|
reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);
|
typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);
|
||||||
@ -279,10 +309,14 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// resize
|
// resize
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace resize
|
||||||
{
|
{
|
||||||
template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
||||||
{
|
{
|
||||||
@ -346,7 +380,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::imgproc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ resize;
|
||||||
|
|
||||||
typedef void (*caller_t)(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
typedef void (*caller_t)(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||||
static const caller_t callers[6][4] =
|
static const caller_t callers[6][4] =
|
||||||
@ -366,18 +400,24 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// copyMakeBorder
|
// copyMakeBorder
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace copy_make_border
|
||||||
{
|
{
|
||||||
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
|
template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T, int cn> void copyMakeBorder_caller(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
|
template <typename T, int cn> void copyMakeBorder_caller(const DevMem2Db& src, const DevMem2Db& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ copy_make_border;
|
||||||
|
|
||||||
Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
|
Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));
|
||||||
|
|
||||||
imgproc::copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);
|
copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -626,16 +666,22 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// buildWarpPlaneMaps
|
// buildWarpPlaneMaps
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
|
void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T,
|
||||||
float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
||||||
CV_Assert((T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32F && T.isContinuous());
|
CV_Assert((T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32F && T.isContinuous());
|
||||||
@ -647,23 +693,29 @@ void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, cons
|
|||||||
|
|
||||||
map_x.create(dst_roi.size(), CV_32F);
|
map_x.create(dst_roi.size(), CV_32F);
|
||||||
map_y.create(dst_roi.size(), CV_32F);
|
map_y.create(dst_roi.size(), CV_32F);
|
||||||
imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
|
buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
|
||||||
T.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
T.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// buildWarpCylyndricalMaps
|
// buildWarpCylyndricalMaps
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
||||||
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
||||||
|
|
||||||
@ -674,24 +726,29 @@ void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K
|
|||||||
|
|
||||||
map_x.create(dst_roi.size(), CV_32F);
|
map_x.create(dst_roi.size(), CV_32F);
|
||||||
map_y.create(dst_roi.size(), CV_32F);
|
map_y.create(dst_roi.size(), CV_32F);
|
||||||
imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
|
buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||||
scale, StreamAccessor::getStream(stream));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// buildWarpSphericalMaps
|
// buildWarpSphericalMaps
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,
|
||||||
const float k_rinv[9], const float r_kinv[9], float scale,
|
const float k_rinv[9], const float r_kinv[9], float scale,
|
||||||
cudaStream_t stream);
|
cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,
|
||||||
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
GpuMat& map_x, GpuMat& map_y, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);
|
||||||
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);
|
||||||
|
|
||||||
@ -702,8 +759,7 @@ void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K,
|
|||||||
|
|
||||||
map_x.create(dst_roi.size(), CV_32F);
|
map_x.create(dst_roi.size(), CV_32F);
|
||||||
map_y.create(dst_roi.size(), CV_32F);
|
map_y.create(dst_roi.size(), CV_32F);
|
||||||
imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),
|
buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), scale, StreamAccessor::getStream(stream));
|
||||||
scale, StreamAccessor::getStream(stream));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
@ -843,17 +899,24 @@ void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s)
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// columnSum
|
// columnSum
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst);
|
void columnSum_32F(const DevMem2Db src, const DevMem2Db dst);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
|
void cv::gpu::columnSum(const GpuMat& src, GpuMat& dst)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_32F);
|
CV_Assert(src.type() == CV_32F);
|
||||||
|
|
||||||
dst.create(src.size(), CV_32F);
|
dst.create(src.size(), CV_32F);
|
||||||
imgproc::columnSum_32F(src, dst);
|
|
||||||
|
columnSum_32F(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
|
void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s)
|
||||||
@ -1140,7 +1203,6 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, S
|
|||||||
histRange(src, hist, levels, buf, stream);
|
histRange(src, hist, levels, buf, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream)
|
void cv::gpu::histRange(const GpuMat& src, GpuMat& hist, const GpuMat& levels, GpuMat& buf, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);
|
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1);
|
||||||
@ -1183,13 +1245,19 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
|
|||||||
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace histograms
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace hist
|
||||||
{
|
{
|
||||||
void histogram256_gpu(DevMem2Db src, int* hist, unsigned int* buf, cudaStream_t stream);
|
void histogram256_gpu(DevMem2Db src, int* hist, unsigned int* buf, cudaStream_t stream);
|
||||||
|
|
||||||
const int PARTIAL_HISTOGRAM256_COUNT = 240;
|
const int PARTIAL_HISTOGRAM256_COUNT = 240;
|
||||||
const int HISTOGRAM256_BIN_COUNT = 256;
|
const int HISTOGRAM256_BIN_COUNT = 256;
|
||||||
}}}
|
|
||||||
|
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
|
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
|
||||||
{
|
{
|
||||||
@ -1199,7 +1267,7 @@ void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, Stream& stream)
|
|||||||
|
|
||||||
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)
|
void cv::gpu::calcHist(const GpuMat& src, GpuMat& hist, GpuMat& buf, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::histograms;
|
using namespace OPENCV_DEVICE_NAMESPACE_ hist;
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
|
|
||||||
@ -1223,14 +1291,9 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, Stream&
|
|||||||
equalizeHist(src, dst, hist, buf, stream);
|
equalizeHist(src, dst, hist, buf, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace histograms
|
|
||||||
{
|
|
||||||
void equalizeHist_gpu(DevMem2Db src, DevMem2Db dst, const int* lut, cudaStream_t stream);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)
|
void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat& buf, Stream& s)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::histograms;
|
using namespace OPENCV_DEVICE_NAMESPACE_ hist;
|
||||||
|
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
|
|
||||||
@ -1264,13 +1327,16 @@ void cv::gpu::equalizeHist(const GpuMat& src, GpuMat& dst, GpuMat& hist, GpuMat&
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// cornerHarris & minEgenVal
|
// cornerHarris & minEgenVal
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
|
{
|
||||||
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream);
|
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream);
|
||||||
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
||||||
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
|
||||||
|
}
|
||||||
|
|
||||||
}}}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -1316,7 +1382,6 @@ namespace
|
|||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
|
||||||
bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
|
bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
|
||||||
{
|
{
|
||||||
switch (cpuBorderType)
|
switch (cpuBorderType)
|
||||||
@ -1356,6 +1421,8 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& D
|
|||||||
|
|
||||||
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
|
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
||||||
borderType == cv::BORDER_REPLICATE);
|
borderType == cv::BORDER_REPLICATE);
|
||||||
|
|
||||||
@ -1364,7 +1431,7 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& D
|
|||||||
|
|
||||||
extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
|
extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
|
||||||
dst.create(src.size(), CV_32F);
|
dst.create(src.size(), CV_32F);
|
||||||
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
|
cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
|
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
|
||||||
@ -1381,6 +1448,8 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
|
|||||||
|
|
||||||
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
|
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
CV_Assert(borderType == cv::BORDER_REFLECT101 ||
|
||||||
borderType == cv::BORDER_REPLICATE);
|
borderType == cv::BORDER_REPLICATE);
|
||||||
|
|
||||||
@ -1389,24 +1458,30 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
|
|||||||
|
|
||||||
extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
|
extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
|
||||||
dst.create(src.size(), CV_32F);
|
dst.create(src.size(), CV_32F);
|
||||||
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
|
cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// mulSpectrums
|
// mulSpectrums
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
|
|
||||||
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
|
void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
||||||
static Caller callers[] = { imgproc::mulSpectrums, imgproc::mulSpectrums_CONJ };
|
|
||||||
|
static Caller callers[] = { mulSpectrums, mulSpectrums_CONJ };
|
||||||
|
|
||||||
CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
|
CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
|
||||||
CV_Assert(a.size() == b.size());
|
CV_Assert(a.size() == b.size());
|
||||||
@ -1420,18 +1495,23 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// mulAndScaleSpectrums
|
// mulAndScaleSpectrums
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulAndScaleSpectrums(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
|
|
||||||
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
void mulAndScaleSpectrums_CONJ(const PtrStep<cufftComplex> a, const PtrStep<cufftComplex> b, float scale, DevMem2D_<cufftComplex> c, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
|
void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
typedef void (*Caller)(const PtrStep<cufftComplex>, const PtrStep<cufftComplex>, float scale, DevMem2D_<cufftComplex>, cudaStream_t stream);
|
||||||
static Caller callers[] = { imgproc::mulAndScaleSpectrums, imgproc::mulAndScaleSpectrums_CONJ };
|
static Caller callers[] = { mulAndScaleSpectrums, mulAndScaleSpectrums_CONJ };
|
||||||
|
|
||||||
CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
|
CV_Assert(a.type() == b.type() && a.type() == CV_32FC2);
|
||||||
CV_Assert(a.size() == b.size());
|
CV_Assert(a.size() == b.size());
|
||||||
@ -1593,13 +1673,19 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
convolve(image, templ, result, ccorr, buf);
|
convolve(image, templ, result, ccorr, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace imgproc
|
||||||
{
|
{
|
||||||
void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream);
|
void convolve_gpu(const DevMem2Df& src, const PtrStepf& dst, int kWidth, int kHeight, float* kernel, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
|
void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ imgproc;
|
||||||
|
|
||||||
#ifndef HAVE_CUFFT
|
#ifndef HAVE_CUFFT
|
||||||
|
|
||||||
CV_Assert(image.type() == CV_32F);
|
CV_Assert(image.type() == CV_32F);
|
||||||
@ -1622,7 +1708,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
templ.copyTo(contKernel);
|
templ.copyTo(contKernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
imgproc::convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr<float>(), StreamAccessor::getStream(stream));
|
convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr<float>(), StreamAccessor::getStream(stream));
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@ -1650,7 +1736,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
templ.copyTo(contKernel);
|
templ.copyTo(contKernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
imgproc::convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr<float>(), StreamAccessor::getStream(stream));
|
convolve_gpu(image, result, templ.cols, templ.rows, contKernel.ptr<float>(), StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1725,14 +1811,18 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// pyrDown
|
// pyrDown
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace pyr_down
|
||||||
{
|
{
|
||||||
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template <typename T, int cn> void pyrDown_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::imgproc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ pyr_down;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
@ -1761,14 +1851,18 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& st
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// pyrUp
|
// pyrUp
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace pyr_up
|
||||||
{
|
{
|
||||||
template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
template <typename T, int cn> void pyrUp_gpu(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::imgproc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ pyr_up;
|
||||||
|
|
||||||
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
typedef void (*func_t)(const DevMem2Db& src, const DevMem2Db& dst, int borderType, cudaStream_t stream);
|
||||||
|
|
||||||
@ -1839,7 +1933,9 @@ void cv::gpu::CannyBuf::release()
|
|||||||
trackBuf2.release();
|
trackBuf2.release();
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace canny
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace canny
|
||||||
{
|
{
|
||||||
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);
|
void calcSobelRowPass_gpu(PtrStepb src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols);
|
||||||
|
|
||||||
@ -1853,13 +1949,15 @@ namespace cv { namespace gpu { namespace canny
|
|||||||
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);
|
void edgesHysteresisGlobal_gpu(PtrStepi map, ushort2* st1, ushort2* st2, int rows, int cols);
|
||||||
|
|
||||||
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols);
|
void getEdges_gpu(PtrStepi map, PtrStepb dst, int rows, int cols);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
|
void CannyCaller(CannyBuf& buf, GpuMat& dst, float low_thresh, float high_thresh)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::canny;
|
using namespace OPENCV_DEVICE_NAMESPACE_ canny;
|
||||||
|
|
||||||
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
calcMap_gpu(buf.dx, buf.dy, buf.edgeBuf, buf.edgeBuf, dst.rows, dst.cols, low_thresh, high_thresh);
|
||||||
|
|
||||||
@ -1879,7 +1977,7 @@ void cv::gpu::Canny(const GpuMat& src, GpuMat& dst, double low_thresh, double hi
|
|||||||
|
|
||||||
void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::canny;
|
using namespace OPENCV_DEVICE_NAMESPACE_ canny;
|
||||||
|
|
||||||
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
@ -1918,7 +2016,7 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, GpuMat& dst, double low_
|
|||||||
|
|
||||||
void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
|
void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& dst, double low_thresh, double high_thresh, bool L2gradient)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::canny;
|
using namespace OPENCV_DEVICE_NAMESPACE_ canny;
|
||||||
|
|
||||||
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));
|
||||||
CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
|
CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());
|
||||||
|
@ -271,5 +271,380 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory)
|
|||||||
setDevice(prev_device_id);
|
setDevice(prev_device_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////
|
||||||
|
// GpuFuncTable
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
void copy_to_with_mask(const DevMem2Db& src, DevMem2Db dst, int depth, const DevMem2Db& mask, int channels, const cudaStream_t& stream = 0);
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, int channels, cudaStream_t stream);
|
||||||
|
template <typename T>
|
||||||
|
void set_to_gpu(const DevMem2Db& mat, const T* scalar, const DevMem2Db& mask, int channels, cudaStream_t stream);
|
||||||
|
|
||||||
|
void convert_gpu(const DevMem2Db& src, int sdepth, const DevMem2Db& dst, int ddepth, double alpha, double beta, cudaStream_t stream = 0);
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Convert
|
||||||
|
|
||||||
|
template<int n> struct NPPTypeTraits;
|
||||||
|
template<> struct NPPTypeTraits<CV_8U> { typedef Npp8u npp_type; };
|
||||||
|
template<> struct NPPTypeTraits<CV_16U> { typedef Npp16u npp_type; };
|
||||||
|
template<> struct NPPTypeTraits<CV_16S> { typedef Npp16s npp_type; };
|
||||||
|
template<> struct NPPTypeTraits<CV_32S> { typedef Npp32s npp_type; };
|
||||||
|
template<> struct NPPTypeTraits<CV_32F> { typedef Npp32f npp_type; };
|
||||||
|
|
||||||
|
template<int SDEPTH, int DDEPTH> struct NppConvertFunc
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
||||||
|
|
||||||
|
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI);
|
||||||
|
};
|
||||||
|
template<int DDEPTH> struct NppConvertFunc<CV_32F, DDEPTH>
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
||||||
|
|
||||||
|
typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, dst_t* pDst, int nDstStep, NppiSize oSizeROI, NppRoundMode eRoundMode);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int SDEPTH, int DDEPTH, typename NppConvertFunc<SDEPTH, DDEPTH>::func_ptr func> struct NppCvt
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
||||||
|
|
||||||
|
static void cvt(const GpuMat& src, GpuMat& dst)
|
||||||
|
{
|
||||||
|
NppiSize sz;
|
||||||
|
sz.width = src.cols;
|
||||||
|
sz.height = src.rows;
|
||||||
|
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz) );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<int DDEPTH, typename NppConvertFunc<CV_32F, DDEPTH>::func_ptr func> struct NppCvt<CV_32F, DDEPTH, func>
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<DDEPTH>::npp_type dst_t;
|
||||||
|
|
||||||
|
static void cvt(const GpuMat& src, GpuMat& dst)
|
||||||
|
{
|
||||||
|
NppiSize sz;
|
||||||
|
sz.width = src.cols;
|
||||||
|
sz.height = src.rows;
|
||||||
|
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), dst.ptr<dst_t>(), static_cast<int>(dst.step), sz, NPP_RND_NEAR) );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void convertToKernelCaller(const GpuMat& src, GpuMat& dst)
|
||||||
|
{
|
||||||
|
OPENCV_DEVICE_NAMESPACE_ convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// Set
|
||||||
|
|
||||||
|
template<int SDEPTH, int SCN> struct NppSetFunc
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
|
||||||
|
};
|
||||||
|
template<int SDEPTH> struct NppSetFunc<SDEPTH, 1>
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int SDEPTH, int SCN, typename NppSetFunc<SDEPTH, SCN>::func_ptr func> struct NppSet
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
static void set(GpuMat& src, Scalar s)
|
||||||
|
{
|
||||||
|
NppiSize sz;
|
||||||
|
sz.width = src.cols;
|
||||||
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
Scalar_<src_t> nppS = s;
|
||||||
|
|
||||||
|
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<int SDEPTH, typename NppSetFunc<SDEPTH, 1>::func_ptr func> struct NppSet<SDEPTH, 1, func>
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
static void set(GpuMat& src, Scalar s)
|
||||||
|
{
|
||||||
|
NppiSize sz;
|
||||||
|
sz.width = src.cols;
|
||||||
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
Scalar_<src_t> nppS = s;
|
||||||
|
|
||||||
|
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz) );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void kernelSet(GpuMat& src, Scalar s)
|
||||||
|
{
|
||||||
|
Scalar_<T> sf = s;
|
||||||
|
OPENCV_DEVICE_NAMESPACE_ set_to_gpu(src, sf.val, src.channels(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int SDEPTH, int SCN> struct NppSetMaskFunc
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
typedef NppStatus (*func_ptr)(const src_t values[], src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
|
||||||
|
};
|
||||||
|
template<int SDEPTH> struct NppSetMaskFunc<SDEPTH, 1>
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
typedef NppStatus (*func_ptr)(src_t val, src_t* pSrc, int nSrcStep, NppiSize oSizeROI, const Npp8u* pMask, int nMaskStep);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<int SDEPTH, int SCN, typename NppSetMaskFunc<SDEPTH, SCN>::func_ptr func> struct NppSetMask
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
static void set(GpuMat& src, Scalar s, const GpuMat& mask)
|
||||||
|
{
|
||||||
|
NppiSize sz;
|
||||||
|
sz.width = src.cols;
|
||||||
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
Scalar_<src_t> nppS = s;
|
||||||
|
|
||||||
|
nppSafeCall( func(nppS.val, src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
template<int SDEPTH, typename NppSetMaskFunc<SDEPTH, 1>::func_ptr func> struct NppSetMask<SDEPTH, 1, func>
|
||||||
|
{
|
||||||
|
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
|
||||||
|
|
||||||
|
static void set(GpuMat& src, Scalar s, const GpuMat& mask)
|
||||||
|
{
|
||||||
|
NppiSize sz;
|
||||||
|
sz.width = src.cols;
|
||||||
|
sz.height = src.rows;
|
||||||
|
|
||||||
|
Scalar_<src_t> nppS = s;
|
||||||
|
|
||||||
|
nppSafeCall( func(nppS[0], src.ptr<src_t>(), static_cast<int>(src.step), sz, mask.ptr<Npp8u>(), static_cast<int>(mask.step)) );
|
||||||
|
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void kernelSetMask(GpuMat& src, Scalar s, const GpuMat& mask)
|
||||||
|
{
|
||||||
|
Scalar_<T> sf = s;
|
||||||
|
OPENCV_DEVICE_NAMESPACE_ set_to_gpu(src, sf.val, mask, src.channels(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
class CudaFuncTable : public GpuFuncTable
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void copy(const Mat& src, GpuMat& dst) const
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyHostToDevice) );
|
||||||
|
}
|
||||||
|
void copy(const GpuMat& src, Mat& dst) const
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost) );
|
||||||
|
}
|
||||||
|
void copy(const GpuMat& src, GpuMat& dst) const
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMemcpy2D(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToDevice) );
|
||||||
|
}
|
||||||
|
|
||||||
|
void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask) const
|
||||||
|
{
|
||||||
|
OPENCV_DEVICE_NAMESPACE_ copy_to_with_mask(src, dst, src.depth(), mask, src.channels());
|
||||||
|
}
|
||||||
|
|
||||||
|
void convert(const GpuMat& src, GpuMat& dst) const
|
||||||
|
{
|
||||||
|
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst);
|
||||||
|
static const caller_t callers[7][7][7] =
|
||||||
|
{
|
||||||
|
{
|
||||||
|
/* 8U -> 8U */ {0, 0, 0, 0},
|
||||||
|
/* 8U -> 8S */ {convertToKernelCaller, convertToKernelCaller, convertToKernelCaller, convertToKernelCaller},
|
||||||
|
/* 8U -> 16U */ {NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16U, nppiConvert_8u16u_C4R>::cvt},
|
||||||
|
/* 8U -> 16S */ {NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_8U, CV_16S, nppiConvert_8u16s_C4R>::cvt},
|
||||||
|
/* 8U -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 8U -> 32F */ {NppCvt<CV_8U, CV_32F, nppiConvert_8u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 8U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* 8S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 8S -> 8S */ {0,0,0,0},
|
||||||
|
/* 8S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 8S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 8S -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 8S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 8S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* 16U -> 8U */ {NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16U, CV_8U, nppiConvert_16u8u_C4R>::cvt},
|
||||||
|
/* 16U -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16U -> 16U */ {0,0,0,0},
|
||||||
|
/* 16U -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16U -> 32S */ {NppCvt<CV_16U, CV_32S, nppiConvert_16u32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16U -> 32F */ {NppCvt<CV_16U, CV_32F, nppiConvert_16u32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16U -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* 16S -> 8U */ {NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,NppCvt<CV_16S, CV_8U, nppiConvert_16s8u_C4R>::cvt},
|
||||||
|
/* 16S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16S -> 16S */ {0,0,0,0},
|
||||||
|
/* 16S -> 32S */ {NppCvt<CV_16S, CV_32S, nppiConvert_16s32s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16S -> 32F */ {NppCvt<CV_16S, CV_32F, nppiConvert_16s32f_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 16S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* 32S -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32S -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32S -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32S -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32S -> 32S */ {0,0,0,0},
|
||||||
|
/* 32S -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32S -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* 32F -> 8U */ {NppCvt<CV_32F, CV_8U, nppiConvert_32f8u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32F -> 16U */ {NppCvt<CV_32F, CV_16U, nppiConvert_32f16u_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32F -> 16S */ {NppCvt<CV_32F, CV_16S, nppiConvert_32f16s_C1R>::cvt,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 32F -> 32F */ {0,0,0,0},
|
||||||
|
/* 32F -> 64F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
/* 64F -> 8U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 64F -> 8S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 64F -> 16U */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 64F -> 16S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 64F -> 32S */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 64F -> 32F */ {convertToKernelCaller,convertToKernelCaller,convertToKernelCaller,convertToKernelCaller},
|
||||||
|
/* 64F -> 64F */ {0,0,0,0}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
caller_t func = callers[src.depth()][dst.depth()][src.channels() - 1];
|
||||||
|
CV_DbgAssert(func != 0);
|
||||||
|
|
||||||
|
func(src, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
void convert(const GpuMat& src, GpuMat& dst, double alpha, double beta) const
|
||||||
|
{
|
||||||
|
device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta);
|
||||||
|
}
|
||||||
|
|
||||||
|
void setTo(GpuMat& m, Scalar s, const GpuMat& mask) const
|
||||||
|
{
|
||||||
|
NppiSize sz;
|
||||||
|
sz.width = m.cols;
|
||||||
|
sz.height = m.rows;
|
||||||
|
|
||||||
|
if (mask.empty())
|
||||||
|
{
|
||||||
|
if (s[0] == 0.0 && s[1] == 0.0 && s[2] == 0.0 && s[3] == 0.0)
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMemset2D(m.data, m.step, 0, m.cols * m.elemSize(), m.rows) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (m.depth() == CV_8U)
|
||||||
|
{
|
||||||
|
int cn = m.channels();
|
||||||
|
|
||||||
|
if (cn == 1 || (cn == 2 && s[0] == s[1]) || (cn == 3 && s[0] == s[1] && s[0] == s[2]) || (cn == 4 && s[0] == s[1] && s[0] == s[2] && s[0] == s[3]))
|
||||||
|
{
|
||||||
|
int val = saturate_cast<uchar>(s[0]);
|
||||||
|
cudaSafeCall( cudaMemset2D(m.data, m.step, val, m.cols * m.elemSize(), m.rows) );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef void (*caller_t)(GpuMat& src, Scalar s);
|
||||||
|
static const caller_t callers[7][4] =
|
||||||
|
{
|
||||||
|
{NppSet<CV_8U, 1, nppiSet_8u_C1R>::set,kernelSet<uchar>,kernelSet<uchar>,NppSet<CV_8U, 4, nppiSet_8u_C4R>::set},
|
||||||
|
{kernelSet<schar>,kernelSet<schar>,kernelSet<schar>,kernelSet<schar>},
|
||||||
|
{NppSet<CV_16U, 1, nppiSet_16u_C1R>::set,NppSet<CV_16U, 2, nppiSet_16u_C2R>::set,kernelSet<ushort>,NppSet<CV_16U, 4, nppiSet_16u_C4R>::set},
|
||||||
|
{NppSet<CV_16S, 1, nppiSet_16s_C1R>::set,NppSet<CV_16S, 2, nppiSet_16s_C2R>::set,kernelSet<short>,NppSet<CV_16S, 4, nppiSet_16s_C4R>::set},
|
||||||
|
{NppSet<CV_32S, 1, nppiSet_32s_C1R>::set,kernelSet<int>,kernelSet<int>,NppSet<CV_32S, 4, nppiSet_32s_C4R>::set},
|
||||||
|
{NppSet<CV_32F, 1, nppiSet_32f_C1R>::set,kernelSet<float>,kernelSet<float>,NppSet<CV_32F, 4, nppiSet_32f_C4R>::set},
|
||||||
|
{kernelSet<double>,kernelSet<double>,kernelSet<double>,kernelSet<double>}
|
||||||
|
};
|
||||||
|
|
||||||
|
callers[m.depth()][m.channels() - 1](m, s);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
typedef void (*caller_t)(GpuMat& src, Scalar s, const GpuMat& mask);
|
||||||
|
|
||||||
|
static const caller_t callers[7][4] =
|
||||||
|
{
|
||||||
|
{NppSetMask<CV_8U, 1, nppiSet_8u_C1MR>::set,kernelSetMask<uchar>,kernelSetMask<uchar>,NppSetMask<CV_8U, 4, nppiSet_8u_C4MR>::set},
|
||||||
|
{kernelSetMask<schar>,kernelSetMask<schar>,kernelSetMask<schar>,kernelSetMask<schar>},
|
||||||
|
{NppSetMask<CV_16U, 1, nppiSet_16u_C1MR>::set,kernelSetMask<ushort>,kernelSetMask<ushort>,NppSetMask<CV_16U, 4, nppiSet_16u_C4MR>::set},
|
||||||
|
{NppSetMask<CV_16S, 1, nppiSet_16s_C1MR>::set,kernelSetMask<short>,kernelSetMask<short>,NppSetMask<CV_16S, 4, nppiSet_16s_C4MR>::set},
|
||||||
|
{NppSetMask<CV_32S, 1, nppiSet_32s_C1MR>::set,kernelSetMask<int>,kernelSetMask<int>,NppSetMask<CV_32S, 4, nppiSet_32s_C4MR>::set},
|
||||||
|
{NppSetMask<CV_32F, 1, nppiSet_32f_C1MR>::set,kernelSetMask<float>,kernelSetMask<float>,NppSetMask<CV_32F, 4, nppiSet_32f_C4MR>::set},
|
||||||
|
{kernelSetMask<double>,kernelSetMask<double>,kernelSetMask<double>,kernelSetMask<double>}
|
||||||
|
};
|
||||||
|
|
||||||
|
callers[m.depth()][m.channels() - 1](m, s, mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void mallocPitch(void** devPtr, size_t* step, size_t width, size_t height) const
|
||||||
|
{
|
||||||
|
cudaSafeCall( cudaMallocPitch(devPtr, step, width, height) );
|
||||||
|
}
|
||||||
|
|
||||||
|
void free(void* devPtr) const
|
||||||
|
{
|
||||||
|
cudaFree(devPtr);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class Initializer
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
Initializer()
|
||||||
|
{
|
||||||
|
static CudaFuncTable funcTable;
|
||||||
|
setGpuFuncTable(&funcTable);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Initializer init;
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -44,6 +44,7 @@
|
|||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
@ -51,7 +52,9 @@ void cv::gpu::matchTemplate(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&)
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace imgproc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace match_template
|
||||||
{
|
{
|
||||||
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_CCORR_8U(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
void matchTemplateNaive_CCORR_32F(const DevMem2Db image, const DevMem2Db templ, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
@ -132,8 +135,11 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
unsigned int templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream);
|
unsigned int templ_sqsum, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
|
|
||||||
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream);
|
void extractFirstChannel_32F(const DevMem2Db image, DevMem2Df result, int cn, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ match_template;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -177,14 +183,14 @@ namespace
|
|||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
if (templ.size().area() < getTemplateThreshold(CV_TM_CCORR, CV_32F))
|
if (templ.size().area() < getTemplateThreshold(CV_TM_CCORR, CV_32F))
|
||||||
{
|
{
|
||||||
imgproc::matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
GpuMat result_;
|
GpuMat result_;
|
||||||
ConvolveBuf buf;
|
ConvolveBuf buf;
|
||||||
convolve(image.reshape(1), templ.reshape(1), result_, true, buf, stream);
|
convolve(image.reshape(1), templ.reshape(1), result_, true, buf, stream);
|
||||||
imgproc::extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream));
|
extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -193,7 +199,7 @@ namespace
|
|||||||
if (templ.size().area() < getTemplateThreshold(CV_TM_CCORR, CV_8U))
|
if (templ.size().area() < getTemplateThreshold(CV_TM_CCORR, CV_8U))
|
||||||
{
|
{
|
||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
imgproc::matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -220,15 +226,14 @@ namespace
|
|||||||
sqrIntegral(image.reshape(1), img_sqsum, stream);
|
sqrIntegral(image.reshape(1), img_sqsum, stream);
|
||||||
|
|
||||||
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
||||||
imgproc::normalize_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum,
|
normalize_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
result, image.channels(), StreamAccessor::getStream(stream));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void matchTemplate_SQDIFF_32F(const GpuMat& image, const GpuMat& templ, GpuMat& result, Stream& stream)
|
void matchTemplate_SQDIFF_32F(const GpuMat& image, const GpuMat& templ, GpuMat& result, Stream& stream)
|
||||||
{
|
{
|
||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
imgproc::matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -237,7 +242,7 @@ namespace
|
|||||||
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, CV_8U))
|
if (templ.size().area() < getTemplateThreshold(CV_TM_SQDIFF, CV_8U))
|
||||||
{
|
{
|
||||||
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
|
||||||
imgproc::matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -247,8 +252,7 @@ namespace
|
|||||||
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
||||||
|
|
||||||
matchTemplate_CCORR_8U(image, templ, result, stream);
|
matchTemplate_CCORR_8U(image, templ, result, stream);
|
||||||
imgproc::matchTemplatePrepared_SQDIFF_8U(
|
matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -260,8 +264,7 @@ namespace
|
|||||||
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
unsigned int templ_sqsum = (unsigned int)sqrSum(templ.reshape(1))[0];
|
||||||
|
|
||||||
matchTemplate_CCORR_8U(image, templ, result, stream);
|
matchTemplate_CCORR_8U(image, templ, result, stream);
|
||||||
imgproc::matchTemplatePrepared_SQDIFF_NORMED_8U(
|
matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
||||||
templ.cols, templ.rows, img_sqsum, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -275,13 +278,12 @@ namespace
|
|||||||
integral(image, image_sum, stream);
|
integral(image, image_sum, stream);
|
||||||
|
|
||||||
unsigned int templ_sum = (unsigned int)sum(templ)[0];
|
unsigned int templ_sum = (unsigned int)sum(templ)[0];
|
||||||
imgproc::matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows,
|
matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sum, templ_sum, result, StreamAccessor::getStream(stream));
|
||||||
image_sum, templ_sum, result, StreamAccessor::getStream(stream));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::vector<GpuMat> images;
|
vector<GpuMat> images;
|
||||||
std::vector<GpuMat> image_sums(image.channels());
|
vector<GpuMat> image_sums(image.channels());
|
||||||
|
|
||||||
split(image, images);
|
split(image, images);
|
||||||
for (int i = 0; i < image.channels(); ++i)
|
for (int i = 0; i < image.channels(); ++i)
|
||||||
@ -292,19 +294,19 @@ namespace
|
|||||||
switch (image.channels())
|
switch (image.channels())
|
||||||
{
|
{
|
||||||
case 2:
|
case 2:
|
||||||
imgproc::matchTemplatePrepared_CCOFF_8UC2(
|
matchTemplatePrepared_CCOFF_8UC2(
|
||||||
templ.cols, templ.rows, image_sums[0], image_sums[1],
|
templ.cols, templ.rows, image_sums[0], image_sums[1],
|
||||||
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1],
|
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
imgproc::matchTemplatePrepared_CCOFF_8UC3(
|
matchTemplatePrepared_CCOFF_8UC3(
|
||||||
templ.cols, templ.rows, image_sums[0], image_sums[1], image_sums[2],
|
templ.cols, templ.rows, image_sums[0], image_sums[1], image_sums[2],
|
||||||
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
|
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
|
||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
imgproc::matchTemplatePrepared_CCOFF_8UC4(
|
matchTemplatePrepared_CCOFF_8UC4(
|
||||||
templ.cols, templ.rows, image_sums[0], image_sums[1], image_sums[2], image_sums[3],
|
templ.cols, templ.rows, image_sums[0], image_sums[1], image_sums[2], image_sums[3],
|
||||||
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
|
(unsigned int)templ_sum[0], (unsigned int)templ_sum[1], (unsigned int)templ_sum[2],
|
||||||
(unsigned int)templ_sum[3], result, StreamAccessor::getStream(stream));
|
(unsigned int)templ_sum[3], result, StreamAccessor::getStream(stream));
|
||||||
@ -341,15 +343,15 @@ namespace
|
|||||||
unsigned int templ_sum = (unsigned int)sum(templ)[0];
|
unsigned int templ_sum = (unsigned int)sum(templ)[0];
|
||||||
unsigned int templ_sqsum = (unsigned int)sqrSum(templ)[0];
|
unsigned int templ_sqsum = (unsigned int)sqrSum(templ)[0];
|
||||||
|
|
||||||
imgproc::matchTemplatePrepared_CCOFF_NORMED_8U(
|
matchTemplatePrepared_CCOFF_NORMED_8U(
|
||||||
templ.cols, templ.rows, image_sum, image_sqsum,
|
templ.cols, templ.rows, image_sum, image_sqsum,
|
||||||
templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream));
|
templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
std::vector<GpuMat> images;
|
vector<GpuMat> images;
|
||||||
std::vector<GpuMat> image_sums(image.channels());
|
vector<GpuMat> image_sums(image.channels());
|
||||||
std::vector<GpuMat> image_sqsums(image.channels());
|
vector<GpuMat> image_sqsums(image.channels());
|
||||||
|
|
||||||
split(image, images);
|
split(image, images);
|
||||||
for (int i = 0; i < image.channels(); ++i)
|
for (int i = 0; i < image.channels(); ++i)
|
||||||
@ -364,7 +366,7 @@ namespace
|
|||||||
switch (image.channels())
|
switch (image.channels())
|
||||||
{
|
{
|
||||||
case 2:
|
case 2:
|
||||||
imgproc::matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
matchTemplatePrepared_CCOFF_NORMED_8UC2(
|
||||||
templ.cols, templ.rows,
|
templ.cols, templ.rows,
|
||||||
image_sums[0], image_sqsums[0],
|
image_sums[0], image_sqsums[0],
|
||||||
image_sums[1], image_sqsums[1],
|
image_sums[1], image_sqsums[1],
|
||||||
@ -373,7 +375,7 @@ namespace
|
|||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
imgproc::matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
matchTemplatePrepared_CCOFF_NORMED_8UC3(
|
||||||
templ.cols, templ.rows,
|
templ.cols, templ.rows,
|
||||||
image_sums[0], image_sqsums[0],
|
image_sums[0], image_sqsums[0],
|
||||||
image_sums[1], image_sqsums[1],
|
image_sums[1], image_sqsums[1],
|
||||||
@ -384,7 +386,7 @@ namespace
|
|||||||
result, StreamAccessor::getStream(stream));
|
result, StreamAccessor::getStream(stream));
|
||||||
break;
|
break;
|
||||||
case 4:
|
case 4:
|
||||||
imgproc::matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
matchTemplatePrepared_CCOFF_NORMED_8UC4(
|
||||||
templ.cols, templ.rows,
|
templ.cols, templ.rows,
|
||||||
image_sums[0], image_sqsums[0],
|
image_sums[0], image_sqsums[0],
|
||||||
image_sums[1], image_sqsums[1],
|
image_sums[1], image_sqsums[1],
|
||||||
|
@ -45,6 +45,139 @@
|
|||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::CudaMem()
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::CudaMem(int _rows, int _cols, int _type, int _alloc_type)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
|
||||||
|
{
|
||||||
|
if( _rows > 0 && _cols > 0 )
|
||||||
|
create( _rows, _cols, _type, _alloc_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::CudaMem(Size _size, int _type, int _alloc_type)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
|
||||||
|
{
|
||||||
|
if( _size.height > 0 && _size.width > 0 )
|
||||||
|
create( _size.height, _size.width, _type, _alloc_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::CudaMem(const CudaMem& m)
|
||||||
|
: flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), alloc_type(m.alloc_type)
|
||||||
|
{
|
||||||
|
if( refcount )
|
||||||
|
CV_XADD(refcount, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::CudaMem(const Mat& m, int _alloc_type)
|
||||||
|
: flags(0), rows(0), cols(0), step(0), data(0), refcount(0), datastart(0), dataend(0), alloc_type(0)
|
||||||
|
{
|
||||||
|
if( m.rows > 0 && m.cols > 0 )
|
||||||
|
create( m.size(), m.type(), _alloc_type);
|
||||||
|
|
||||||
|
Mat tmp = createMatHeader();
|
||||||
|
m.copyTo(tmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::~CudaMem()
|
||||||
|
{
|
||||||
|
release();
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMem& cv::gpu::CudaMem::operator = (const CudaMem& m)
|
||||||
|
{
|
||||||
|
if( this != &m )
|
||||||
|
{
|
||||||
|
if( m.refcount )
|
||||||
|
CV_XADD(m.refcount, 1);
|
||||||
|
release();
|
||||||
|
flags = m.flags;
|
||||||
|
rows = m.rows; cols = m.cols;
|
||||||
|
step = m.step; data = m.data;
|
||||||
|
datastart = m.datastart;
|
||||||
|
dataend = m.dataend;
|
||||||
|
refcount = m.refcount;
|
||||||
|
alloc_type = m.alloc_type;
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
CudaMem cv::gpu::CudaMem::clone() const
|
||||||
|
{
|
||||||
|
CudaMem m(size(), type(), alloc_type);
|
||||||
|
Mat to = m;
|
||||||
|
Mat from = *this;
|
||||||
|
from.copyTo(to);
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gpu::CudaMem::create(Size _size, int _type, int _alloc_type)
|
||||||
|
{
|
||||||
|
create(_size.height, _size.width, _type, _alloc_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat cv::gpu::CudaMem::createMatHeader() const
|
||||||
|
{
|
||||||
|
return Mat(size(), type(), data, step);
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::operator Mat() const
|
||||||
|
{
|
||||||
|
return createMatHeader();
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::gpu::CudaMem::operator GpuMat() const
|
||||||
|
{
|
||||||
|
return createGpuMatHeader();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool cv::gpu::CudaMem::isContinuous() const
|
||||||
|
{
|
||||||
|
return (flags & Mat::CONTINUOUS_FLAG) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t cv::gpu::CudaMem::elemSize() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t cv::gpu::CudaMem::elemSize1() const
|
||||||
|
{
|
||||||
|
return CV_ELEM_SIZE1(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
int cv::gpu::CudaMem::type() const
|
||||||
|
{
|
||||||
|
return CV_MAT_TYPE(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
int cv::gpu::CudaMem::depth() const
|
||||||
|
{
|
||||||
|
return CV_MAT_DEPTH(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
int cv::gpu::CudaMem::channels() const
|
||||||
|
{
|
||||||
|
return CV_MAT_CN(flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t cv::gpu::CudaMem::step1() const
|
||||||
|
{
|
||||||
|
return step/elemSize1();
|
||||||
|
}
|
||||||
|
|
||||||
|
Size cv::gpu::CudaMem::size() const
|
||||||
|
{
|
||||||
|
return Size(cols, rows);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool cv::gpu::CudaMem::empty() const
|
||||||
|
{
|
||||||
|
return data == 0;
|
||||||
|
}
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
|
|
||||||
void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); }
|
void cv::gpu::registerPageLocked(Mat&) { throw_nogpu(); }
|
||||||
|
@ -190,7 +190,11 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Sum
|
// Sum
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
|
namespace sum
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void sumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void sumCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
||||||
@ -210,12 +214,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void sqrSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
void sqrSumMultipassCaller(const DevMem2Db src, PtrStepb buf, double* sum, int cn);
|
||||||
|
|
||||||
namespace sums
|
|
||||||
{
|
|
||||||
void getBufSizeRequired(int cols, int rows, int cn, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int cn, int& bufcols, int& bufrows);
|
||||||
}
|
}
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
Scalar cv::gpu::sum(const GpuMat& src)
|
Scalar cv::gpu::sum(const GpuMat& src)
|
||||||
{
|
{
|
||||||
@ -226,23 +229,25 @@ Scalar cv::gpu::sum(const GpuMat& src)
|
|||||||
|
|
||||||
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace mathfunc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
||||||
|
|
||||||
static Caller multipass_callers[7] = {
|
static Caller multipass_callers[7] =
|
||||||
|
{
|
||||||
sumMultipassCaller<unsigned char>, sumMultipassCaller<char>,
|
sumMultipassCaller<unsigned char>, sumMultipassCaller<char>,
|
||||||
sumMultipassCaller<unsigned short>, sumMultipassCaller<short>,
|
sumMultipassCaller<unsigned short>, sumMultipassCaller<short>,
|
||||||
sumMultipassCaller<int>, sumMultipassCaller<float>, 0 };
|
sumMultipassCaller<int>, sumMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static Caller singlepass_callers[7] = {
|
static Caller singlepass_callers[7] = {
|
||||||
sumCaller<unsigned char>, sumCaller<char>,
|
sumCaller<unsigned char>, sumCaller<char>,
|
||||||
sumCaller<unsigned short>, sumCaller<short>,
|
sumCaller<unsigned short>, sumCaller<short>,
|
||||||
sumCaller<int>, sumCaller<float>, 0 };
|
sumCaller<int>, sumCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
Size buf_size;
|
Size buf_size;
|
||||||
sums::getBufSizeRequired(src.cols, src.rows, src.channels(),
|
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
|
||||||
buf_size.width, buf_size.height);
|
|
||||||
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||||
|
|
||||||
Caller* callers = multipass_callers;
|
Caller* callers = multipass_callers;
|
||||||
@ -267,23 +272,26 @@ Scalar cv::gpu::absSum(const GpuMat& src)
|
|||||||
|
|
||||||
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
|
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace mathfunc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
||||||
|
|
||||||
static Caller multipass_callers[7] = {
|
static Caller multipass_callers[7] =
|
||||||
|
{
|
||||||
absSumMultipassCaller<unsigned char>, absSumMultipassCaller<char>,
|
absSumMultipassCaller<unsigned char>, absSumMultipassCaller<char>,
|
||||||
absSumMultipassCaller<unsigned short>, absSumMultipassCaller<short>,
|
absSumMultipassCaller<unsigned short>, absSumMultipassCaller<short>,
|
||||||
absSumMultipassCaller<int>, absSumMultipassCaller<float>, 0 };
|
absSumMultipassCaller<int>, absSumMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static Caller singlepass_callers[7] = {
|
static Caller singlepass_callers[7] =
|
||||||
|
{
|
||||||
absSumCaller<unsigned char>, absSumCaller<char>,
|
absSumCaller<unsigned char>, absSumCaller<char>,
|
||||||
absSumCaller<unsigned short>, absSumCaller<short>,
|
absSumCaller<unsigned short>, absSumCaller<short>,
|
||||||
absSumCaller<int>, absSumCaller<float>, 0 };
|
absSumCaller<int>, absSumCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
Size buf_size;
|
Size buf_size;
|
||||||
sums::getBufSizeRequired(src.cols, src.rows, src.channels(),
|
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
|
||||||
buf_size.width, buf_size.height);
|
|
||||||
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||||
|
|
||||||
Caller* callers = multipass_callers;
|
Caller* callers = multipass_callers;
|
||||||
@ -308,27 +316,30 @@ Scalar cv::gpu::sqrSum(const GpuMat& src)
|
|||||||
|
|
||||||
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace mathfunc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::sum;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
|
||||||
|
|
||||||
static Caller multipass_callers[7] = {
|
static Caller multipass_callers[7] =
|
||||||
|
{
|
||||||
sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>,
|
sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>,
|
||||||
sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>,
|
sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>,
|
||||||
sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float>, 0 };
|
sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static Caller singlepass_callers[7] = {
|
static Caller singlepass_callers[7] =
|
||||||
|
{
|
||||||
sqrSumCaller<unsigned char>, sqrSumCaller<char>,
|
sqrSumCaller<unsigned char>, sqrSumCaller<char>,
|
||||||
sqrSumCaller<unsigned short>, sqrSumCaller<short>,
|
sqrSumCaller<unsigned short>, sqrSumCaller<short>,
|
||||||
sqrSumCaller<int>, sqrSumCaller<float>, 0 };
|
sqrSumCaller<int>, sqrSumCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
Caller* callers = multipass_callers;
|
Caller* callers = multipass_callers;
|
||||||
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
|
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
|
||||||
callers = singlepass_callers;
|
callers = singlepass_callers;
|
||||||
|
|
||||||
Size buf_size;
|
Size buf_size;
|
||||||
sums::getBufSizeRequired(src.cols, src.rows, src.channels(),
|
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
|
||||||
buf_size.width, buf_size.height);
|
|
||||||
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||||
|
|
||||||
Caller caller = callers[src.depth()];
|
Caller caller = callers[src.depth()];
|
||||||
@ -339,14 +350,15 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
|
|||||||
return Scalar(result[0], result[1], result[2], result[3]);
|
return Scalar(result[0], result[1], result[2], result[3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Find min or max
|
// Find min or max
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc { namespace minmax {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
|
namespace minmax
|
||||||
|
{
|
||||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -360,8 +372,10 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmax {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}}}}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask)
|
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask)
|
||||||
@ -373,39 +387,43 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
|||||||
|
|
||||||
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
|
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace mathfunc::minmax;
|
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::minmax;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
|
typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
|
||||||
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
|
||||||
|
|
||||||
static Caller multipass_callers[7] = {
|
static Caller multipass_callers[7] =
|
||||||
|
{
|
||||||
minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>,
|
minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>,
|
||||||
minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>,
|
minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>,
|
||||||
minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0 };
|
minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static Caller singlepass_callers[7] = {
|
static Caller singlepass_callers[7] =
|
||||||
|
{
|
||||||
minMaxCaller<unsigned char>, minMaxCaller<char>,
|
minMaxCaller<unsigned char>, minMaxCaller<char>,
|
||||||
minMaxCaller<unsigned short>, minMaxCaller<short>,
|
minMaxCaller<unsigned short>, minMaxCaller<short>,
|
||||||
minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double> };
|
minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double>
|
||||||
|
};
|
||||||
|
|
||||||
static MaskedCaller masked_multipass_callers[7] = {
|
static MaskedCaller masked_multipass_callers[7] =
|
||||||
|
{
|
||||||
minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>,
|
minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>,
|
||||||
minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>,
|
minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>,
|
||||||
minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0 };
|
minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static MaskedCaller masked_singlepass_callers[7] = {
|
static MaskedCaller masked_singlepass_callers[7] =
|
||||||
|
{
|
||||||
minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>,
|
minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>,
|
||||||
minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>,
|
minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>,
|
||||||
minMaxMaskCaller<int>, minMaxMaskCaller<float>,
|
minMaxMaskCaller<int>, minMaxMaskCaller<float>, minMaxMaskCaller<double>
|
||||||
minMaxMaskCaller<double> };
|
};
|
||||||
|
|
||||||
CV_Assert(src.channels() == 1);
|
CV_Assert(src.channels() == 1);
|
||||||
|
|
||||||
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
||||||
|
|
||||||
CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) &&
|
|
||||||
DeviceInfo().supports(NATIVE_DOUBLE)));
|
|
||||||
|
|
||||||
double minVal_; if (!minVal) minVal = &minVal_;
|
double minVal_; if (!minVal) minVal = &minVal_;
|
||||||
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
||||||
|
|
||||||
@ -439,8 +457,12 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Locate min and max
|
// Locate min and max
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
|
namespace minmaxloc
|
||||||
|
{
|
||||||
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
|
||||||
int& b1rows, int& b2cols, int& b2rows);
|
int& b1rows, int& b2cols, int& b2rows);
|
||||||
|
|
||||||
@ -459,8 +481,10 @@ namespace cv { namespace gpu { namespace mathfunc { namespace minmaxloc {
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
|
||||||
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
|
||||||
}}}}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
|
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
|
||||||
{
|
{
|
||||||
@ -468,43 +492,46 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
|||||||
minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
|
minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc,
|
||||||
const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
|
const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf)
|
||||||
{
|
{
|
||||||
using namespace mathfunc::minmaxloc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::minmaxloc;
|
||||||
|
|
||||||
typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
|
||||||
|
|
||||||
static Caller multipass_callers[7] = {
|
static Caller multipass_callers[7] =
|
||||||
|
{
|
||||||
minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>,
|
minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>,
|
||||||
minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>,
|
minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>,
|
||||||
minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0 };
|
minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static Caller singlepass_callers[7] = {
|
static Caller singlepass_callers[7] =
|
||||||
|
{
|
||||||
minMaxLocCaller<unsigned char>, minMaxLocCaller<char>,
|
minMaxLocCaller<unsigned char>, minMaxLocCaller<char>,
|
||||||
minMaxLocCaller<unsigned short>, minMaxLocCaller<short>,
|
minMaxLocCaller<unsigned short>, minMaxLocCaller<short>,
|
||||||
minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double> };
|
minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double>
|
||||||
|
};
|
||||||
|
|
||||||
static MaskedCaller masked_multipass_callers[7] = {
|
static MaskedCaller masked_multipass_callers[7] =
|
||||||
|
{
|
||||||
minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>,
|
minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>,
|
||||||
minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>,
|
minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>,
|
||||||
minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0 };
|
minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static MaskedCaller masked_singlepass_callers[7] = {
|
static MaskedCaller masked_singlepass_callers[7] =
|
||||||
|
{
|
||||||
minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>,
|
minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>,
|
||||||
minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>,
|
minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>,
|
||||||
minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>,
|
minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double>
|
||||||
minMaxLocMaskCaller<double> };
|
};
|
||||||
|
|
||||||
CV_Assert(src.channels() == 1);
|
CV_Assert(src.channels() == 1);
|
||||||
|
|
||||||
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
|
||||||
|
|
||||||
CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) &&
|
|
||||||
DeviceInfo().supports(NATIVE_DOUBLE)));
|
|
||||||
|
|
||||||
double minVal_; if (!minVal) minVal = &minVal_;
|
double minVal_; if (!minVal) minVal = &minVal_;
|
||||||
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
double maxVal_; if (!maxVal) maxVal = &maxVal_;
|
||||||
int minLoc_[2];
|
int minLoc_[2];
|
||||||
@ -544,8 +571,12 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// Count non-zero elements
|
// Count non-zero elements
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc { namespace countnonzero {
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
|
namespace countnonzero
|
||||||
|
{
|
||||||
void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows);
|
void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -553,9 +584,10 @@ namespace cv { namespace gpu { namespace mathfunc { namespace countnonzero {
|
|||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf);
|
int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}}}}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
|
||||||
int cv::gpu::countNonZero(const GpuMat& src)
|
int cv::gpu::countNonZero(const GpuMat& src)
|
||||||
{
|
{
|
||||||
@ -566,26 +598,25 @@ int cv::gpu::countNonZero(const GpuMat& src)
|
|||||||
|
|
||||||
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
||||||
{
|
{
|
||||||
using namespace mathfunc::countnonzero;
|
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions::countnonzero;
|
||||||
|
|
||||||
typedef int (*Caller)(const DevMem2Db src, PtrStepb buf);
|
typedef int (*Caller)(const DevMem2Db src, PtrStepb buf);
|
||||||
|
|
||||||
static Caller multipass_callers[7] = {
|
static Caller multipass_callers[7] =
|
||||||
|
{
|
||||||
countNonZeroMultipassCaller<unsigned char>, countNonZeroMultipassCaller<char>,
|
countNonZeroMultipassCaller<unsigned char>, countNonZeroMultipassCaller<char>,
|
||||||
countNonZeroMultipassCaller<unsigned short>, countNonZeroMultipassCaller<short>,
|
countNonZeroMultipassCaller<unsigned short>, countNonZeroMultipassCaller<short>,
|
||||||
countNonZeroMultipassCaller<int>, countNonZeroMultipassCaller<float>, 0 };
|
countNonZeroMultipassCaller<int>, countNonZeroMultipassCaller<float>, 0
|
||||||
|
};
|
||||||
|
|
||||||
static Caller singlepass_callers[7] = {
|
static Caller singlepass_callers[7] =
|
||||||
|
{
|
||||||
countNonZeroCaller<unsigned char>, countNonZeroCaller<char>,
|
countNonZeroCaller<unsigned char>, countNonZeroCaller<char>,
|
||||||
countNonZeroCaller<unsigned short>, countNonZeroCaller<short>,
|
countNonZeroCaller<unsigned short>, countNonZeroCaller<short>,
|
||||||
countNonZeroCaller<int>, countNonZeroCaller<float>,
|
countNonZeroCaller<int>, countNonZeroCaller<float>, countNonZeroCaller<double> };
|
||||||
countNonZeroCaller<double> };
|
|
||||||
|
|
||||||
CV_Assert(src.channels() == 1);
|
CV_Assert(src.channels() == 1);
|
||||||
|
|
||||||
CV_Assert(src.type() != CV_64F || (TargetArchs::builtWith(NATIVE_DOUBLE) &&
|
|
||||||
DeviceInfo().supports(NATIVE_DOUBLE)));
|
|
||||||
|
|
||||||
Size buf_size;
|
Size buf_size;
|
||||||
getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);
|
getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);
|
||||||
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
ensureSizeIsEnough(buf_size, CV_8U, buf);
|
||||||
@ -601,15 +632,20 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
|
|||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
// reduce
|
// reduce
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace mathfunc {
|
namespace matrix_reductions
|
||||||
|
{
|
||||||
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream)
|
void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::mathfunc;
|
using namespace OPENCV_DEVICE_NAMESPACE_ matrix_reductions;
|
||||||
|
|
||||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4 && dtype <= CV_32F);
|
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4 && dtype <= CV_32F);
|
||||||
CV_Assert(dim == 0 || dim == 1);
|
CV_Assert(dim == 0 || dim == 1);
|
||||||
CV_Assert(reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG || reduceOp == CV_REDUCE_MAX || reduceOp == CV_REDUCE_MIN);
|
CV_Assert(reduceOp == CV_REDUCE_SUM || reduceOp == CV_REDUCE_AVG || reduceOp == CV_REDUCE_MAX || reduceOp == CV_REDUCE_MIN);
|
||||||
|
@ -234,10 +234,10 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
|
|||||||
const int hsp = sp;
|
const int hsp = sp;
|
||||||
|
|
||||||
// Perform mean shift procedure and obtain region and spatial maps
|
// Perform mean shift procedure and obtain region and spatial maps
|
||||||
GpuMat h_rmap, h_spmap;
|
GpuMat d_rmap, d_spmap;
|
||||||
meanShiftProc(src, h_rmap, h_spmap, sp, sr, criteria);
|
meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
|
||||||
Mat rmap = h_rmap;
|
Mat rmap(d_rmap);
|
||||||
Mat spmap = h_spmap;
|
Mat spmap(d_spmap);
|
||||||
|
|
||||||
Graph<SegmLinkVal> g(nrows * ncols, 4 * (nrows - 1) * (ncols - 1)
|
Graph<SegmLinkVal> g(nrows * ncols, 4 * (nrows - 1) * (ncols - 1)
|
||||||
+ (nrows - 1) + (ncols - 1));
|
+ (nrows - 1) + (ncols - 1));
|
||||||
@ -352,7 +352,7 @@ void cv::gpu::meanShiftSegmentation(const GpuMat& src, Mat& dst, int sp, int sr,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Compute sum of the pixel's colors which are in the same segment
|
// Compute sum of the pixel's colors which are in the same segment
|
||||||
Mat h_src = src;
|
Mat h_src(src);
|
||||||
vector<Vec4i> sumcols(nrows * ncols, Vec4i(0, 0, 0, 0));
|
vector<Vec4i> sumcols(nrows * ncols, Vec4i(0, 0, 0, 0));
|
||||||
for (int y = 0; y < nrows; ++y)
|
for (int y = 0; y < nrows; ++y)
|
||||||
{
|
{
|
||||||
|
@ -43,12 +43,13 @@
|
|||||||
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||||
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "saturate_cast.hpp"
|
#include "saturate_cast.hpp"
|
||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "vec_math.hpp"
|
#include "vec_math.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
// BrdConstant
|
// BrdConstant
|
||||||
|
|
||||||
@ -709,6 +710,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const int width;
|
const int width;
|
||||||
const D val;
|
const D val;
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||||
|
@ -43,10 +43,11 @@
|
|||||||
#ifndef __OPENCV_GPU_COLOR_HPP__
|
#ifndef __OPENCV_GPU_COLOR_HPP__
|
||||||
#define __OPENCV_GPU_COLOR_HPP__
|
#define __OPENCV_GPU_COLOR_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "detail/color_detail.hpp"
|
#include "detail/color_detail.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||||
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||||
// {
|
// {
|
||||||
@ -216,6 +217,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
|
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||||
|
@ -45,6 +45,8 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#if defined(_WIN64) || defined(__LP64__)
|
#if defined(_WIN64) || defined(__LP64__)
|
||||||
// 64-bit register modifier for inlined asm
|
// 64-bit register modifier for inlined asm
|
||||||
#define OPENCV_GPU_ASM_PTR "l"
|
#define OPENCV_GPU_ASM_PTR "l"
|
||||||
@ -53,8 +55,6 @@
|
|||||||
#define OPENCV_GPU_ASM_PTR "r"
|
#define OPENCV_GPU_ASM_PTR "r"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
|
||||||
{
|
|
||||||
#if __CUDA_ARCH__ >= 200
|
#if __CUDA_ARCH__ >= 200
|
||||||
|
|
||||||
// for Fermi memory space is detected automatically
|
// for Fermi memory space is detected automatically
|
||||||
@ -99,6 +99,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
|
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
|
||||||
|
|
||||||
#endif // __CUDA_ARCH__ >= 200
|
#endif // __CUDA_ARCH__ >= 200
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
#endif // __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||||
|
@ -43,17 +43,18 @@
|
|||||||
#ifndef __OPENCV_GPU_COLOR_DETAIL_HPP__
|
#ifndef __OPENCV_GPU_COLOR_DETAIL_HPP__
|
||||||
#define __OPENCV_GPU_COLOR_DETAIL_HPP__
|
#define __OPENCV_GPU_COLOR_DETAIL_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "../vec_traits.hpp"
|
#include "../vec_traits.hpp"
|
||||||
#include "../saturate_cast.hpp"
|
#include "../saturate_cast.hpp"
|
||||||
#include "../limits.hpp"
|
#include "../limits.hpp"
|
||||||
#include "../functional.hpp"
|
#include "../functional.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#ifndef CV_DESCALE
|
#ifndef CV_DESCALE
|
||||||
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
|
||||||
{
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
template<typename T> struct ColorChannel
|
template<typename T> struct ColorChannel
|
||||||
@ -1388,6 +1389,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return functor_type(); \
|
return functor_type(); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_COLOR_DETAIL_HPP__
|
#endif // __OPENCV_GPU_COLOR_DETAIL_HPP__
|
||||||
|
@ -47,8 +47,8 @@
|
|||||||
#include "../vec_traits.hpp"
|
#include "../vec_traits.hpp"
|
||||||
#include "../functional.hpp"
|
#include "../functional.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
//! Mask accessor
|
//! Mask accessor
|
||||||
@ -405,6 +405,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||||
|
@ -43,10 +43,11 @@
|
|||||||
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
#ifndef __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||||
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
#define __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "../vec_traits.hpp"
|
#include "../vec_traits.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
template <bool, typename T1, typename T2> struct Select { typedef T1 type; };
|
||||||
@ -181,6 +182,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
enum { value = 1 };
|
enum { value = 1 };
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
#endif // __OPENCV_GPU_TYPE_TRAITS_DETAIL_HPP__
|
||||||
|
@ -43,8 +43,10 @@
|
|||||||
#ifndef __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
#ifndef __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
||||||
#define __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
#define __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
#include "internal_shared.hpp"
|
||||||
{
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -836,6 +838,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
#endif // __OPENCV_GPU_UTILITY_DETAIL_HPP__
|
||||||
|
@ -43,10 +43,11 @@
|
|||||||
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
#ifndef __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||||
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
#define __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "../datamov_utils.hpp"
|
#include "../datamov_utils.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
template <int THREAD_DIM, int N> struct UnrollVecDiffCached
|
||||||
@ -112,6 +113,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
#endif // __OPENCV_GPU_VEC_DISTANCE_DETAIL_HPP__
|
||||||
|
@ -43,8 +43,10 @@
|
|||||||
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
#ifndef __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||||
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
#define __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
#include "internal_shared.hpp"
|
||||||
{
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
template<class T> struct DynamicSharedMem
|
template<class T> struct DynamicSharedMem
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ operator T*()
|
__device__ __forceinline__ operator T*()
|
||||||
@ -75,6 +77,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return (double*)__smem_d;
|
return (double*)__smem_d;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
#endif // __OPENCV_GPU_DYNAMIC_SMEM_HPP__
|
||||||
|
@ -43,12 +43,11 @@
|
|||||||
#ifndef OPENCV_GPU_EMULATION_HPP_
|
#ifndef OPENCV_GPU_EMULATION_HPP_
|
||||||
#define OPENCV_GPU_EMULATION_HPP_
|
#define OPENCV_GPU_EMULATION_HPP_
|
||||||
|
|
||||||
#include "opencv2/gpu/device/warp_reduce.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
#include "warp_reduce.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
namespace device
|
|
||||||
{
|
|
||||||
struct Emulation
|
struct Emulation
|
||||||
{
|
{
|
||||||
static __forceinline__ __device__ int Ballot(int predicate, volatile int* cta_buffer)
|
static __forceinline__ __device__ int Ballot(int predicate, volatile int* cta_buffer)
|
||||||
@ -63,7 +62,7 @@ namespace cv
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
|
||||||
}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif /* OPENCV_GPU_EMULATION_HPP_ */
|
#endif /* OPENCV_GPU_EMULATION_HPP_ */
|
@ -43,12 +43,13 @@
|
|||||||
#ifndef __OPENCV_GPU_FILTERS_HPP__
|
#ifndef __OPENCV_GPU_FILTERS_HPP__
|
||||||
#define __OPENCV_GPU_FILTERS_HPP__
|
#define __OPENCV_GPU_FILTERS_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "saturate_cast.hpp"
|
#include "saturate_cast.hpp"
|
||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "vec_math.hpp"
|
#include "vec_math.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename Ptr2D> struct PointFilter
|
template <typename Ptr2D> struct PointFilter
|
||||||
{
|
{
|
||||||
typedef typename Ptr2D::elem_type elem_type;
|
typedef typename Ptr2D::elem_type elem_type;
|
||||||
@ -130,6 +131,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
const Ptr2D src;
|
const Ptr2D src;
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_FILTERS_HPP__
|
#endif // __OPENCV_GPU_FILTERS_HPP__
|
||||||
|
@ -45,13 +45,10 @@
|
|||||||
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
|
#define __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
namespace gpu
|
|
||||||
{
|
|
||||||
namespace device
|
|
||||||
{
|
|
||||||
template<class Func>
|
template<class Func>
|
||||||
void printFuncAttrib(Func& func)
|
void printFuncAttrib(Func& func)
|
||||||
{
|
{
|
||||||
@ -71,8 +68,7 @@ namespace cv
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */
|
#endif /* __OPENCV_GPU_DEVICE_FUNCATTRIB_HPP_ */
|
@ -49,8 +49,8 @@
|
|||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "type_traits.hpp"
|
#include "type_traits.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
// Function Objects
|
// Function Objects
|
||||||
|
|
||||||
using thrust::unary_function;
|
using thrust::unary_function;
|
||||||
@ -241,15 +241,15 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return lhs < rhs ? rhs : lhs;
|
return lhs < rhs ? rhs : lhs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, max)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, ::max)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, schar, max)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, schar, ::max)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, char, max)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, char, ::max)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, ushort, max)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, ushort, ::max)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, short, max)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, short, ::max)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, int, max)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, int, ::max)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uint, max)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uint, ::max)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, fmax)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, ::fmax)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, fmax)
|
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, ::fmax)
|
||||||
|
|
||||||
template <typename T> struct minimum : binary_function<T, T, T>
|
template <typename T> struct minimum : binary_function<T, T, T>
|
||||||
{
|
{
|
||||||
@ -258,15 +258,15 @@ namespace cv { namespace gpu { namespace device
|
|||||||
return lhs < rhs ? lhs : rhs;
|
return lhs < rhs ? lhs : rhs;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, schar, min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, schar, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, char, min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, char, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, ushort, min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, ushort, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, short, min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, short, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, int, min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, int, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uint, min)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uint, ::min)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, float, fmin)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, float, ::fmin)
|
||||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, fmin)
|
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, ::fmin)
|
||||||
|
|
||||||
#undef OPENCV_GPU_IMPLEMENT_MINMAX
|
#undef OPENCV_GPU_IMPLEMENT_MINMAX
|
||||||
|
|
||||||
@ -277,14 +277,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{ \
|
{ \
|
||||||
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
|
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v) const \
|
||||||
{ \
|
{ \
|
||||||
return func ## f(v); \
|
return :: ## func ## f(v); \
|
||||||
} \
|
} \
|
||||||
}; \
|
}; \
|
||||||
template <> struct func ## _func<double> : unary_function<double, double> \
|
template <> struct func ## _func<double> : unary_function<double, double> \
|
||||||
{ \
|
{ \
|
||||||
__device__ __forceinline__ double operator ()(double v) const \
|
__device__ __forceinline__ double operator ()(double v) const \
|
||||||
{ \
|
{ \
|
||||||
return func(v); \
|
return :: ## func(v); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(func) \
|
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(func) \
|
||||||
@ -292,14 +292,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{ \
|
{ \
|
||||||
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
|
__device__ __forceinline__ float operator ()(typename TypeTraits<T>::ParameterType v1, typename TypeTraits<T>::ParameterType v2) const \
|
||||||
{ \
|
{ \
|
||||||
return func ## f(v1, v2); \
|
return :: ## func ## f(v1, v2); \
|
||||||
} \
|
} \
|
||||||
}; \
|
}; \
|
||||||
template <> struct func ## _func<double> : binary_function<double, double, double> \
|
template <> struct func ## _func<double> : binary_function<double, double, double> \
|
||||||
{ \
|
{ \
|
||||||
__device__ __forceinline__ double operator ()(double v1, double v2) const \
|
__device__ __forceinline__ double operator ()(double v1, double v2) const \
|
||||||
{ \
|
{ \
|
||||||
return func(v1, v2); \
|
return :: ## func(v1, v2); \
|
||||||
} \
|
} \
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -552,6 +552,10 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
|
template <typename Func> struct TransformFunctorTraits : DefaultTransformFunctorTraits<Func> {};
|
||||||
}}}
|
|
||||||
|
#define DEFINE_TRANSFORM_FUNCTOR_TRAITS(type) \
|
||||||
|
template <> struct TransformFunctorTraits< type > : DefaultTransformFunctorTraits< type >
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
|
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
|
||||||
|
@ -43,8 +43,10 @@
|
|||||||
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
|
#ifndef __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||||
#define __OPENCV_GPU_LIMITS_GPU_HPP__
|
#define __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
#include "internal_shared.hpp"
|
||||||
{
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
template<class T> struct numeric_limits
|
template<class T> struct numeric_limits
|
||||||
{
|
{
|
||||||
typedef T type;
|
typedef T type;
|
||||||
@ -227,6 +229,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
__device__ __forceinline__ static type signaling_NaN();
|
__device__ __forceinline__ static type signaling_NaN();
|
||||||
static const bool is_signed = true;
|
static const bool is_signed = true;
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
|
#endif // __OPENCV_GPU_LIMITS_GPU_HPP__
|
||||||
|
@ -45,8 +45,8 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||||
@ -57,110 +57,161 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
template<typename _Tp> __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||||
|
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||||
{ return (uchar)max((int)v, 0); }
|
{
|
||||||
|
return (uchar) ::max((int)v, 0);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||||
{ return (uchar)min((uint)v, (uint)UCHAR_MAX); }
|
{
|
||||||
|
return (uchar) ::min((uint)v, (uint)UCHAR_MAX);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||||
{ return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
|
{
|
||||||
|
return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||||
{ return (uchar)min(v, (uint)UCHAR_MAX); }
|
{
|
||||||
|
return (uchar) ::min(v, (uint)UCHAR_MAX);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||||
{ return saturate_cast<uchar>((uint)v); }
|
{
|
||||||
|
return saturate_cast<uchar>((uint)v);
|
||||||
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||||
{ int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }
|
{
|
||||||
|
int iv = __float2int_rn(v);
|
||||||
|
return saturate_cast<uchar>(iv);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
template<> __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);
|
int iv = __double2int_rn(v);
|
||||||
|
return saturate_cast<uchar>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<uchar>((float)v);
|
return saturate_cast<uchar>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||||
{ return (schar)min((int)v, SCHAR_MAX); }
|
{
|
||||||
|
return (schar) ::min((int)v, SCHAR_MAX);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||||
{ return (schar)min((uint)v, (uint)SCHAR_MAX); }
|
{
|
||||||
|
return (schar) ::min((uint)v, (uint)SCHAR_MAX);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||||
{
|
{
|
||||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?
|
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||||
v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||||
{ return saturate_cast<schar>((int)v); }
|
{
|
||||||
|
return saturate_cast<schar>((int)v);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||||
{ return (schar)min(v, (uint)SCHAR_MAX); }
|
{
|
||||||
|
return (schar) ::min(v, (uint)SCHAR_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||||
{ int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }
|
{
|
||||||
|
int iv = __float2int_rn(v);
|
||||||
|
return saturate_cast<schar>(iv);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
template<> __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v); return saturate_cast<schar>(iv);
|
int iv = __double2int_rn(v);
|
||||||
|
return saturate_cast<schar>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<schar>((float)v);
|
return saturate_cast<schar>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||||
{ return (ushort)max((int)v, 0); }
|
{
|
||||||
|
return (ushort) ::max((int)v, 0);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||||
{ return (ushort)max((int)v, 0); }
|
{
|
||||||
|
return (ushort) ::max((int)v, 0);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||||
{ return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
|
{
|
||||||
|
return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||||
{ return (ushort)min(v, (uint)USHRT_MAX); }
|
{
|
||||||
|
return (ushort) ::min(v, (uint)USHRT_MAX);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||||
{ int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }
|
{
|
||||||
|
int iv = __float2int_rn(v);
|
||||||
|
return saturate_cast<ushort>(iv);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
template<> __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);
|
int iv = __double2int_rn(v);
|
||||||
|
return saturate_cast<ushort>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<ushort>((float)v);
|
return saturate_cast<ushort>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||||
{ return (short)min((int)v, SHRT_MAX); }
|
{
|
||||||
|
return (short) ::min((int)v, SHRT_MAX);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||||
{
|
{
|
||||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?
|
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||||
v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
|
||||||
}
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||||
{ return (short)min(v, (uint)SHRT_MAX); }
|
{
|
||||||
|
return (short) ::min(v, (uint)SHRT_MAX);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||||
{ int iv = __float2int_rn(v); return saturate_cast<short>(iv); }
|
{
|
||||||
|
int iv = __float2int_rn(v);
|
||||||
|
return saturate_cast<short>(iv);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
template<> __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
int iv = __double2int_rn(v); return saturate_cast<short>(iv);
|
int iv = __double2int_rn(v);
|
||||||
|
return saturate_cast<short>(iv);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<short>((float)v);
|
return saturate_cast<short>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ int saturate_cast<int>(float v) { return __float2int_rn(v); }
|
template<> __device__ __forceinline__ int saturate_cast<int>(float v)
|
||||||
|
{
|
||||||
|
return __float2int_rn(v);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
template<> __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
return __double2int_rn(v);
|
return __double2int_rn(v);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<int>((float)v);
|
return saturate_cast<int>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(float v)
|
||||||
|
{
|
||||||
|
return __float2uint_rn(v);
|
||||||
|
}
|
||||||
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
template<> __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||||
{
|
{
|
||||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
#if __CUDA_ARCH__ >= 130
|
||||||
return __double2uint_rn(v);
|
return __double2uint_rn(v);
|
||||||
#else
|
#else
|
||||||
return saturate_cast<uint>((float)v);
|
return saturate_cast<uint>((float)v);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
|
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
|
@ -43,11 +43,12 @@
|
|||||||
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
|
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
|
||||||
#define __OPENCV_GPU_TRANSFORM_HPP__
|
#define __OPENCV_GPU_TRANSFORM_HPP__
|
||||||
|
|
||||||
#include "detail/transform_detail.hpp"
|
#include "internal_shared.hpp"
|
||||||
#include "utility.hpp"
|
#include "utility.hpp"
|
||||||
|
#include "detail/transform_detail.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
|
||||||
{
|
|
||||||
template <typename T, typename D, typename UnOp>
|
template <typename T, typename D, typename UnOp>
|
||||||
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, cudaStream_t stream = 0)
|
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
@ -69,6 +70,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
|
detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
||||||
|
@ -43,10 +43,11 @@
|
|||||||
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__
|
#ifndef __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||||
#define __OPENCV_GPU_TYPE_TRAITS_HPP__
|
#define __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
#include "detail/type_traits_detail.hpp"
|
#include "detail/type_traits_detail.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template <typename T> struct IsSimpleParameter
|
template <typename T> struct IsSimpleParameter
|
||||||
{
|
{
|
||||||
enum {value = detail::IsIntegral<T>::value || detail::IsFloat<T>::value || detail::PointerTraits<typename detail::ReferenceTraits<T>::type>::value};
|
enum {value = detail::IsIntegral<T>::value || detail::IsFloat<T>::value || detail::PointerTraits<typename detail::ReferenceTraits<T>::type>::value};
|
||||||
@ -75,6 +76,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
typedef typename detail::Select<IsSimpleParameter<UnqualifiedType>::value, T, typename detail::AddParameterType<T>::type>::type ParameterType;
|
typedef typename detail::Select<IsSimpleParameter<UnqualifiedType>::value, T, typename detail::AddParameterType<T>::type>::type ParameterType;
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
|
#endif // __OPENCV_GPU_TYPE_TRAITS_HPP__
|
||||||
|
@ -48,13 +48,13 @@
|
|||||||
#include "datamov_utils.hpp"
|
#include "datamov_utils.hpp"
|
||||||
#include "detail/utility_detail.hpp"
|
#include "detail/utility_detail.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#define OPENCV_GPU_LOG_WARP_SIZE (5)
|
#define OPENCV_GPU_LOG_WARP_SIZE (5)
|
||||||
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
|
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
|
||||||
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||||
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
|
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
|
||||||
{
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// swap
|
// swap
|
||||||
|
|
||||||
@ -116,6 +116,21 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ bool operator()(int, int, int) const
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ bool check(int, int)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __device__ __forceinline__ bool check(int, int, int)
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@ -194,6 +209,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_UTILITY_HPP__
|
#endif // __OPENCV_GPU_UTILITY_HPP__
|
||||||
|
@ -48,8 +48,7 @@
|
|||||||
#include "functional.hpp"
|
#include "functional.hpp"
|
||||||
#include "detail/vec_distance_detail.hpp"
|
#include "detail/vec_distance_detail.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
|
|
||||||
template <typename T> struct L1Dist
|
template <typename T> struct L1Dist
|
||||||
{
|
{
|
||||||
@ -221,6 +220,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
|
|
||||||
U vec1Vals[MAX_LEN / THREAD_DIM];
|
U vec1Vals[MAX_LEN / THREAD_DIM];
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
|
#endif // __OPENCV_GPU_VEC_DISTANCE_HPP__
|
||||||
|
@ -48,8 +48,8 @@
|
|||||||
#include "vec_traits.hpp"
|
#include "vec_traits.hpp"
|
||||||
#include "functional.hpp"
|
#include "functional.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
namespace detail
|
namespace detail
|
||||||
{
|
{
|
||||||
template <int cn, typename VecD> struct SatCastHelper;
|
template <int cn, typename VecD> struct SatCastHelper;
|
||||||
@ -326,6 +326,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
|
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
|
||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
|
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
|
||||||
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
|
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VECMATH_HPP__
|
#endif // __OPENCV_GPU_VECMATH_HPP__
|
@ -45,8 +45,8 @@
|
|||||||
|
|
||||||
#include "internal_shared.hpp"
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
{
|
|
||||||
template<typename T, int N> struct TypeVec;
|
template<typename T, int N> struct TypeVec;
|
||||||
|
|
||||||
struct __align__(8) uchar8
|
struct __align__(8) uchar8
|
||||||
@ -275,6 +275,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
|
||||||
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
|
||||||
};
|
};
|
||||||
}}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
|
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||||
|
@ -40,15 +40,13 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP_
|
#ifndef __OPENCV_GPU_DEVICE_WARP_HPP__
|
||||||
#define __OPENCV_GPU_DEVICE_WARP_HPP_
|
#define __OPENCV_GPU_DEVICE_WARP_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
namespace gpu
|
|
||||||
{
|
|
||||||
namespace device
|
|
||||||
{
|
|
||||||
struct Warp
|
struct Warp
|
||||||
{
|
{
|
||||||
enum
|
enum
|
||||||
@ -111,8 +109,7 @@ namespace cv
|
|||||||
*t = value;
|
*t = value;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP_ */
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
#endif /* __OPENCV_GPU_DEVICE_WARP_HPP__ */
|
@ -41,14 +41,13 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
|
|
||||||
#ifndef OPENCV_GPU_WARP_REDUCE_HPP_
|
#ifndef OPENCV_GPU_WARP_REDUCE_HPP__
|
||||||
#define OPENCV_GPU_WARP_REDUCE_HPP_
|
#define OPENCV_GPU_WARP_REDUCE_HPP__
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
|
|
||||||
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
namespace device
|
|
||||||
{
|
|
||||||
template <class T>
|
template <class T>
|
||||||
__device__ __forceinline__ T warp_reduce ( volatile T *ptr , const unsigned int tid = threadIdx.x )
|
__device__ __forceinline__ T warp_reduce ( volatile T *ptr , const unsigned int tid = threadIdx.x )
|
||||||
{
|
{
|
||||||
@ -64,10 +63,10 @@ namespace cv
|
|||||||
ptr[tid] = partial = partial + ptr[tid + 2];
|
ptr[tid] = partial = partial + ptr[tid + 2];
|
||||||
ptr[tid] = partial = partial + ptr[tid + 1];
|
ptr[tid] = partial = partial + ptr[tid + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
return ptr[tid - lane];
|
return ptr[tid - lane];
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* OPENCV_GPU_WARP_REDUCE_HPP_ */
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
#endif /* OPENCV_GPU_WARP_REDUCE_HPP__ */
|
@ -42,6 +42,8 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
using namespace cv;
|
||||||
|
using namespace cv::gpu;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#if !defined (HAVE_CUDA)
|
#if !defined (HAVE_CUDA)
|
||||||
@ -53,25 +55,25 @@ void cv::gpu::split(const GpuMat& /*src*/, vector<GpuMat>& /*dst*/, Stream& /*st
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace split_merge
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace split_merge
|
||||||
{
|
{
|
||||||
extern "C" void merge_caller(const DevMem2Db* src, DevMem2Db& dst,
|
void merge_caller(const DevMem2Db* src, DevMem2Db& dst, int total_channels, size_t elem_size, const cudaStream_t& stream);
|
||||||
int total_channels, size_t elem_size,
|
void split_caller(const DevMem2Db& src, DevMem2Db* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream);
|
||||||
const cudaStream_t& stream);
|
}
|
||||||
|
|
||||||
extern "C" void split_caller(const DevMem2Db& src, DevMem2Db* dst,
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
int num_channels, size_t elem_size1,
|
|
||||||
const cudaStream_t& stream);
|
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
|
void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ split_merge;
|
||||||
|
|
||||||
CV_Assert(src);
|
CV_Assert(src);
|
||||||
CV_Assert(n > 0);
|
CV_Assert(n > 0);
|
||||||
|
|
||||||
bool double_ok = TargetArchs::builtWith(NATIVE_DOUBLE) &&
|
|
||||||
DeviceInfo().supports(NATIVE_DOUBLE);
|
|
||||||
CV_Assert(src[0].depth() != CV_64F || double_ok);
|
|
||||||
|
|
||||||
int depth = src[0].depth();
|
int depth = src[0].depth();
|
||||||
Size size = src[0].size();
|
Size size = src[0].size();
|
||||||
|
|
||||||
@ -100,20 +102,15 @@ namespace cv { namespace gpu { namespace split_merge
|
|||||||
src_as_devmem[i] = src[i];
|
src_as_devmem[i] = src[i];
|
||||||
|
|
||||||
DevMem2Db dst_as_devmem(dst);
|
DevMem2Db dst_as_devmem(dst);
|
||||||
split_merge::merge_caller(src_as_devmem, dst_as_devmem,
|
merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream);
|
||||||
total_channels, CV_ELEM_SIZE(depth),
|
|
||||||
stream);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream)
|
void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream)
|
||||||
{
|
{
|
||||||
CV_Assert(dst);
|
using namespace OPENCV_DEVICE_NAMESPACE_ split_merge;
|
||||||
|
|
||||||
bool double_ok = TargetArchs::builtWith(NATIVE_DOUBLE) &&
|
CV_Assert(dst);
|
||||||
DeviceInfo().supports(NATIVE_DOUBLE);
|
|
||||||
CV_Assert(src.depth() != CV_64F || double_ok);
|
|
||||||
|
|
||||||
int depth = src.depth();
|
int depth = src.depth();
|
||||||
int num_channels = src.channels();
|
int num_channels = src.channels();
|
||||||
@ -135,38 +132,31 @@ namespace cv { namespace gpu { namespace split_merge
|
|||||||
dst_as_devmem[i] = dst[i];
|
dst_as_devmem[i] = dst[i];
|
||||||
|
|
||||||
DevMem2Db src_as_devmem(src);
|
DevMem2Db src_as_devmem(src);
|
||||||
split_merge::split_caller(src_as_devmem, dst_as_devmem,
|
split_caller(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), stream);
|
||||||
num_channels, src.elemSize1(),
|
}
|
||||||
stream);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}}}
|
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream)
|
void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
split_merge::merge(src, n, dst, StreamAccessor::getStream(stream));
|
::merge(src, n, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream)
|
void cv::gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
split_merge::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream));
|
::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream)
|
void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream)
|
||||||
{
|
{
|
||||||
split_merge::split(src, dst, StreamAccessor::getStream(stream));
|
::split(src, dst, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream)
|
void cv::gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream)
|
||||||
{
|
{
|
||||||
dst.resize(src.channels());
|
dst.resize(src.channels());
|
||||||
if(src.channels() > 0)
|
if(src.channels() > 0)
|
||||||
split_merge::split(src, &dst[0], StreamAccessor::getStream(stream));
|
::split(src, &dst[0], StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
@ -55,21 +55,23 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&,
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace stereobm
|
||||||
{
|
{
|
||||||
namespace bm
|
void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, cudaStream_t & stream);
|
||||||
{
|
void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
||||||
//extern "C" void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf);
|
void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream);
|
||||||
extern "C" void stereoBM_GPU(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& disp, int ndisp, int winsz, const DevMem2D_<uint>& minSSD_buf, cudaStream_t & stream);
|
|
||||||
extern "C" void prefilter_xsobel(const DevMem2Db& input, const DevMem2Db& output, int prefilterCap /*= 31*/, cudaStream_t & stream);
|
|
||||||
extern "C" void postfilter_textureness(const DevMem2Db& input, int winsz, float avgTexturenessThreshold, const DevMem2Db& disp, cudaStream_t & stream);
|
|
||||||
}
|
}
|
||||||
}}
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
const float defaultAvgTexThreshold = 3;
|
const float defaultAvgTexThreshold = 3;
|
||||||
|
|
||||||
cv::gpu::StereoBM_GPU::StereoBM_GPU()
|
cv::gpu::StereoBM_GPU::StereoBM_GPU()
|
||||||
: preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold) {}
|
: preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_, int winSize_)
|
cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_, int winSize_)
|
||||||
: preset(preset_), ndisp(ndisparities_), winSize(winSize_), avergeTexThreshold(defaultAvgTexThreshold)
|
: preset(preset_), ndisp(ndisparities_), winSize(winSize_), avergeTexThreshold(defaultAvgTexThreshold)
|
||||||
@ -93,8 +95,12 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void stereo_bm_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
|
namespace
|
||||||
{
|
{
|
||||||
|
void stereo_bm_gpu_operator( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ stereobm;
|
||||||
|
|
||||||
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
|
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols);
|
||||||
CV_DbgAssert(left.type() == CV_8UC1);
|
CV_DbgAssert(left.type() == CV_8UC1);
|
||||||
CV_DbgAssert(right.type() == CV_8UC1);
|
CV_DbgAssert(right.type() == CV_8UC1);
|
||||||
@ -110,22 +116,23 @@ static void stereo_bm_gpu_operator ( GpuMat& minSSD, GpuMat& leBuf, GpuMat& ri
|
|||||||
leBuf.create( left.size(), left.type());
|
leBuf.create( left.size(), left.type());
|
||||||
riBuf.create(right.size(), right.type());
|
riBuf.create(right.size(), right.type());
|
||||||
|
|
||||||
bm::prefilter_xsobel( left, leBuf, 31, stream);
|
prefilter_xsobel( left, leBuf, 31, stream);
|
||||||
bm::prefilter_xsobel(right, riBuf, 31, stream);
|
prefilter_xsobel(right, riBuf, 31, stream);
|
||||||
|
|
||||||
le_for_bm = leBuf;
|
le_for_bm = leBuf;
|
||||||
ri_for_bm = riBuf;
|
ri_for_bm = riBuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
bm::stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);
|
stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream);
|
||||||
|
|
||||||
if (avergeTexThreshold)
|
if (avergeTexThreshold)
|
||||||
bm::postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream);
|
postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream)
|
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream)
|
||||||
{
|
{
|
||||||
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
|
stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
@ -59,7 +59,9 @@ void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace bp
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace stereobp
|
||||||
{
|
{
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump);
|
||||||
template<typename T, typename D>
|
template<typename T, typename D>
|
||||||
@ -74,7 +76,11 @@ namespace cv { namespace gpu { namespace bp
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
void output_gpu(const DevMem2Db& u, const DevMem2Db& d, const DevMem2Db& l, const DevMem2Db& r, const DevMem2Db& data,
|
||||||
const DevMem2D_<short>& disp, cudaStream_t stream);
|
const DevMem2D_<short>& disp, cudaStream_t stream);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ stereobp;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -84,7 +90,6 @@ namespace
|
|||||||
const float DEFAULT_DISC_SINGLE_JUMP = 1.0f;
|
const float DEFAULT_DISC_SINGLE_JUMP = 1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
|
void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels)
|
||||||
{
|
{
|
||||||
ndisp = width / 4;
|
ndisp = width / 4;
|
||||||
@ -136,8 +141,8 @@ namespace
|
|||||||
typedef void (*comp_data_t)(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
typedef void (*comp_data_t)(const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& data, cudaStream_t stream);
|
||||||
static const comp_data_t comp_data_callers[2][5] =
|
static const comp_data_t comp_data_callers[2][5] =
|
||||||
{
|
{
|
||||||
{0, bp::comp_data_gpu<unsigned char, short>, 0, bp::comp_data_gpu<uchar3, short>, bp::comp_data_gpu<uchar4, short>},
|
{0, comp_data_gpu<unsigned char, short>, 0, comp_data_gpu<uchar3, short>, comp_data_gpu<uchar4, short>},
|
||||||
{0, bp::comp_data_gpu<unsigned char, float>, 0, bp::comp_data_gpu<uchar3, float>, bp::comp_data_gpu<uchar4, float>}
|
{0, comp_data_gpu<unsigned char, float>, 0, comp_data_gpu<uchar3, float>, comp_data_gpu<uchar4, float>}
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_Assert(left.size() == right.size() && left.type() == right.type());
|
CV_Assert(left.size() == right.size() && left.type() == right.type());
|
||||||
@ -236,7 +241,7 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bp::load_constants(rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, scale * rthis.max_disc_term, scale * rthis.disc_single_jump);
|
load_constants(rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, scale * rthis.max_disc_term, scale * rthis.disc_single_jump);
|
||||||
|
|
||||||
datas.resize(rthis.levels);
|
datas.resize(rthis.levels);
|
||||||
|
|
||||||
@ -249,8 +254,6 @@ namespace
|
|||||||
|
|
||||||
void calcBP(GpuMat& disp, Stream& stream)
|
void calcBP(GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
using namespace cv::gpu::bp;
|
|
||||||
|
|
||||||
typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const DevMem2Db& src, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
static const data_step_down_t data_step_down_callers[2] =
|
static const data_step_down_t data_step_down_callers[2] =
|
||||||
{
|
{
|
||||||
@ -354,13 +357,13 @@ namespace
|
|||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
||||||
impl(left, right, disp, stream);
|
impl(left, right, disp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
|
void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp, Stream& stream)
|
||||||
{
|
{
|
||||||
::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out);
|
||||||
impl(data, disp, stream);
|
impl(data, disp, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -57,7 +57,9 @@ void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, Gp
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace csbp
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace stereocsbp
|
||||||
{
|
{
|
||||||
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th,
|
||||||
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp);
|
const DevMem2Db& left, const DevMem2Db& right, const DevMem2Db& temp);
|
||||||
@ -84,8 +86,11 @@ namespace cv { namespace gpu { namespace csbp
|
|||||||
template<class T>
|
template<class T>
|
||||||
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
void compute_disp(const T* u, const T* d, const T* l, const T* r, const T* data_cost_selected, const T* disp_selected, size_t msg_step,
|
||||||
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
const DevMem2D_<short>& disp, int nr_plane, cudaStream_t stream);
|
||||||
|
}
|
||||||
|
|
||||||
}}}
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ stereocsbp;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -208,8 +213,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////
|
||||||
// Compute
|
// Compute
|
||||||
|
|
||||||
csbp::load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight,
|
load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
|
||||||
rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp);
|
|
||||||
|
|
||||||
if (stream)
|
if (stream)
|
||||||
{
|
{
|
||||||
@ -248,17 +252,17 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
{
|
{
|
||||||
if (i == levels - 1)
|
if (i == levels - 1)
|
||||||
{
|
{
|
||||||
csbp::init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(),
|
init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr<T>(), data_cost_selected.ptr<T>(),
|
||||||
step_pyr[i], rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, cudaStream);
|
step_pyr[i], rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, cudaStream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
csbp::compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1],
|
compute_data_cost(disp_selected_pyr[cur_idx].ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1],
|
||||||
left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), cudaStream);
|
left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), cudaStream);
|
||||||
|
|
||||||
int new_idx = (cur_idx + 1) & 1;
|
int new_idx = (cur_idx + 1) & 1;
|
||||||
|
|
||||||
csbp::init_message(u[new_idx].ptr<T>(), d[new_idx].ptr<T>(), l[new_idx].ptr<T>(), r[new_idx].ptr<T>(),
|
init_message(u[new_idx].ptr<T>(), d[new_idx].ptr<T>(), l[new_idx].ptr<T>(), r[new_idx].ptr<T>(),
|
||||||
u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||||
disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(),
|
disp_selected_pyr[new_idx].ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(),
|
||||||
data_cost_selected.ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1], rows_pyr[i],
|
data_cost_selected.ptr<T>(), data_cost.ptr<T>(), step_pyr[i], step_pyr[i+1], rows_pyr[i],
|
||||||
@ -267,7 +271,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
cur_idx = new_idx;
|
cur_idx = new_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
csbp::calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
calc_all_iterations(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||||
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[i],
|
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[i],
|
||||||
rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, cudaStream);
|
rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, cudaStream);
|
||||||
}
|
}
|
||||||
@ -282,7 +286,7 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat u[2], GpuMat d[2]
|
|||||||
else
|
else
|
||||||
out.setTo(zero);
|
out.setTo(zero);
|
||||||
|
|
||||||
csbp::compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
compute_disp(u[cur_idx].ptr<T>(), d[cur_idx].ptr<T>(), l[cur_idx].ptr<T>(), r[cur_idx].ptr<T>(),
|
||||||
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[0], out, nr_plane_pyr[0], cudaStream);
|
data_cost_selected.ptr<T>(), disp_selected_pyr[cur_idx].ptr<T>(), step_pyr[0], out, nr_plane_pyr[0], cudaStream);
|
||||||
|
|
||||||
if (disp.type() != CV_16S)
|
if (disp.type() != CV_16S)
|
||||||
|
@ -63,8 +63,17 @@ void cv::gpu::SURF_GPU::releaseMemory() { throw_nogpu(); }
|
|||||||
|
|
||||||
#else /* !defined (HAVE_CUDA) */
|
#else /* !defined (HAVE_CUDA) */
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace surf
|
BEGIN_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
namespace surf
|
||||||
{
|
{
|
||||||
|
void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
||||||
|
void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
||||||
|
|
||||||
|
void bindImgTex(DevMem2Db img);
|
||||||
|
void bindSumTex(DevMem2D_<uint> sum);
|
||||||
|
void bindMaskSumTex(DevMem2D_<uint> maskSum);
|
||||||
|
|
||||||
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols, int octave, int nOctaveLayers);
|
void icvCalcLayerDetAndTrace_gpu(const PtrStepf& det, const PtrStepf& trace, int img_rows, int img_cols, int octave, int nOctaveLayers);
|
||||||
|
|
||||||
void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4* maxPosBuffer, unsigned int* maxCounter,
|
void icvFindMaximaInLayer_gpu(const PtrStepf& det, const PtrStepf& trace, int4* maxPosBuffer, unsigned int* maxCounter,
|
||||||
@ -78,9 +87,11 @@ namespace cv { namespace gpu { namespace surf
|
|||||||
|
|
||||||
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
void compute_descriptors_gpu(const DevMem2Df& descriptors,
|
||||||
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
|
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir, int nFeatures);
|
||||||
}}}
|
}
|
||||||
|
|
||||||
using namespace cv::gpu::surf;
|
END_OPENCV_DEVICE_NAMESPACE
|
||||||
|
|
||||||
|
using namespace OPENCV_DEVICE_NAMESPACE_ surf;
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
@ -136,24 +147,18 @@ namespace
|
|||||||
counters.create(1, nOctaves + 1, CV_32SC1);
|
counters.create(1, nOctaves + 1, CV_32SC1);
|
||||||
counters.setTo(Scalar::all(0));
|
counters.setTo(Scalar::all(0));
|
||||||
|
|
||||||
uploadConstant("cv::gpu::surf::c_max_candidates", maxCandidates);
|
loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, nOctaveLayers, static_cast<float>(hessianThreshold));
|
||||||
uploadConstant("cv::gpu::surf::c_max_features", maxFeatures);
|
|
||||||
uploadConstant("cv::gpu::surf::c_img_rows", img_rows);
|
|
||||||
uploadConstant("cv::gpu::surf::c_img_cols", img_cols);
|
|
||||||
uploadConstant("cv::gpu::surf::c_nOctaveLayers", nOctaveLayers);
|
|
||||||
uploadConstant("cv::gpu::surf::c_hessianThreshold", static_cast<float>(hessianThreshold));
|
|
||||||
|
|
||||||
imgTex.bind("cv::gpu::surf::imgTex", (DevMem2Db)img);
|
bindImgTex(img);
|
||||||
|
|
||||||
integralBuffered(img, sum, intBuffer);
|
integralBuffered(img, sum, intBuffer);
|
||||||
sumTex.bind("cv::gpu::surf::sumTex", (DevMem2D_<unsigned int>)sum);
|
bindSumTex(sum);
|
||||||
|
|
||||||
if (use_mask)
|
if (use_mask)
|
||||||
{
|
{
|
||||||
min(mask, 1.0, mask1);
|
min(mask, 1.0, mask1);
|
||||||
integralBuffered(mask1, maskSum, intBuffer);
|
integralBuffered(mask1, maskSum, intBuffer);
|
||||||
|
bindMaskSumTex(maskSum);
|
||||||
maskSumTex.bind("cv::gpu::surf::maskSumTex", (DevMem2D_<unsigned int>)maskSum);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,9 +176,7 @@ namespace
|
|||||||
const int layer_rows = img_rows >> octave;
|
const int layer_rows = img_rows >> octave;
|
||||||
const int layer_cols = img_cols >> octave;
|
const int layer_cols = img_cols >> octave;
|
||||||
|
|
||||||
uploadConstant("cv::gpu::surf::c_octave", octave);
|
loadOctaveConstants(octave, layer_rows, layer_cols);
|
||||||
uploadConstant("cv::gpu::surf::c_layer_rows", layer_rows);
|
|
||||||
uploadConstant("cv::gpu::surf::c_layer_cols", layer_cols);
|
|
||||||
|
|
||||||
icvCalcLayerDetAndTrace_gpu(det, trace, img_rows, img_cols, octave, nOctaveLayers);
|
icvCalcLayerDetAndTrace_gpu(det, trace, img_rows, img_cols, octave, nOctaveLayers);
|
||||||
|
|
||||||
@ -242,8 +245,6 @@ namespace
|
|||||||
int maxFeatures;
|
int maxFeatures;
|
||||||
|
|
||||||
GpuMat counters;
|
GpuMat counters;
|
||||||
|
|
||||||
TextureBinder imgTex, sumTex, maskSumTex;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -336,7 +337,7 @@ void cv::gpu::SURF_GPU::downloadKeypoints(const GpuMat& keypointsGPU, vector<Key
|
|||||||
{
|
{
|
||||||
CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == SF_FEATURE_STRIDE);
|
CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == SF_FEATURE_STRIDE);
|
||||||
|
|
||||||
Mat keypointsCPU = keypointsGPU;
|
Mat keypointsCPU(keypointsGPU);
|
||||||
|
|
||||||
keypoints.resize(nFeatures);
|
keypoints.resize(nFeatures);
|
||||||
|
|
||||||
|
@ -549,8 +549,8 @@ TEST_P(MorphEx, Accuracy)
|
|||||||
cv::gpu::GpuMat dev_dst_rgba;
|
cv::gpu::GpuMat dev_dst_rgba;
|
||||||
cv::gpu::GpuMat dev_dst_gray;
|
cv::gpu::GpuMat dev_dst_gray;
|
||||||
|
|
||||||
cv::gpu::morphologyEx(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, morphOps[morphOpsIdx], cv::gpu::GpuMat(kernel));
|
cv::gpu::morphologyEx(cv::gpu::GpuMat(img_rgba), dev_dst_rgba, morphOps[morphOpsIdx], kernel);
|
||||||
cv::gpu::morphologyEx(cv::gpu::GpuMat(img_gray), dev_dst_gray, morphOps[morphOpsIdx], cv::gpu::GpuMat(kernel));
|
cv::gpu::morphologyEx(cv::gpu::GpuMat(img_gray), dev_dst_gray, morphOps[morphOpsIdx], kernel);
|
||||||
|
|
||||||
dev_dst_rgba.download(dst_rgba);
|
dev_dst_rgba.download(dst_rgba);
|
||||||
dev_dst_gray.download(dst_gray);
|
dev_dst_gray.download(dst_gray);
|
||||||
|
@ -137,7 +137,7 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
|
|||||||
#ifdef DUMP
|
#ifdef DUMP
|
||||||
dump(block_hists, locations);
|
dump(block_hists, locations);
|
||||||
#else
|
#else
|
||||||
compare(block_hists, locations);
|
compare(cv::Mat(block_hists), locations);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Test detect on smaller image
|
// Test detect on smaller image
|
||||||
@ -148,7 +148,7 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
|
|||||||
#ifdef DUMP
|
#ifdef DUMP
|
||||||
dump(block_hists, locations);
|
dump(block_hists, locations);
|
||||||
#else
|
#else
|
||||||
compare(block_hists, locations);
|
compare(cv::Mat(block_hists), locations);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Test detect on greater image
|
// Test detect on greater image
|
||||||
@ -158,7 +158,7 @@ struct CV_GpuHogDetectTestRunner : cv::gpu::HOGDescriptor
|
|||||||
#ifdef DUMP
|
#ifdef DUMP
|
||||||
dump(block_hists, locations);
|
dump(block_hists, locations);
|
||||||
#else
|
#else
|
||||||
compare(block_hists, locations);
|
compare(cv::Mat(block_hists), locations);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,31 +254,31 @@ struct CV_GpuHogGetDescriptorsTestRunner : cv::gpu::HOGDescriptor
|
|||||||
ASSERT_TRUE(!img_rgb.empty());
|
ASSERT_TRUE(!img_rgb.empty());
|
||||||
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||||
computeBlockHistograms(cv::gpu::GpuMat(img));
|
computeBlockHistograms(cv::gpu::GpuMat(img));
|
||||||
compare_inner_parts(block_hists, descriptors.rowRange(1, 2));
|
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(1, 2)));
|
||||||
|
|
||||||
img_rgb = readImage("hog/negative1.png");
|
img_rgb = readImage("hog/negative1.png");
|
||||||
ASSERT_TRUE(!img_rgb.empty());
|
ASSERT_TRUE(!img_rgb.empty());
|
||||||
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||||
computeBlockHistograms(cv::gpu::GpuMat(img));
|
computeBlockHistograms(cv::gpu::GpuMat(img));
|
||||||
compare_inner_parts(block_hists, descriptors.rowRange(2, 3));
|
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(2, 3)));
|
||||||
|
|
||||||
img_rgb = readImage("hog/negative2.png");
|
img_rgb = readImage("hog/negative2.png");
|
||||||
ASSERT_TRUE(!img_rgb.empty());
|
ASSERT_TRUE(!img_rgb.empty());
|
||||||
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||||
computeBlockHistograms(cv::gpu::GpuMat(img));
|
computeBlockHistograms(cv::gpu::GpuMat(img));
|
||||||
compare_inner_parts(block_hists, descriptors.rowRange(3, 4));
|
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(3, 4)));
|
||||||
|
|
||||||
img_rgb = readImage("hog/positive3.png");
|
img_rgb = readImage("hog/positive3.png");
|
||||||
ASSERT_TRUE(!img_rgb.empty());
|
ASSERT_TRUE(!img_rgb.empty());
|
||||||
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||||
computeBlockHistograms(cv::gpu::GpuMat(img));
|
computeBlockHistograms(cv::gpu::GpuMat(img));
|
||||||
compare_inner_parts(block_hists, descriptors.rowRange(4, 5));
|
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(4, 5)));
|
||||||
|
|
||||||
img_rgb = readImage("hog/negative3.png");
|
img_rgb = readImage("hog/negative3.png");
|
||||||
ASSERT_TRUE(!img_rgb.empty());
|
ASSERT_TRUE(!img_rgb.empty());
|
||||||
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||||
computeBlockHistograms(cv::gpu::GpuMat(img));
|
computeBlockHistograms(cv::gpu::GpuMat(img));
|
||||||
compare_inner_parts(block_hists, descriptors.rowRange(5, 6));
|
compare_inner_parts(cv::Mat(block_hists), cv::Mat(descriptors.rowRange(5, 6)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Does not compare border value, as interpolation leads to delta
|
// Does not compare border value, as interpolation leads to delta
|
||||||
|
@ -3897,7 +3897,7 @@ static void testC2C(const std::string& hint, int cols, int rows, int flags, bool
|
|||||||
EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
|
EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
|
||||||
ASSERT_EQ(CV_32F, d_b.depth());
|
ASSERT_EQ(CV_32F, d_b.depth());
|
||||||
ASSERT_EQ(2, d_b.channels());
|
ASSERT_EQ(2, d_b.channels());
|
||||||
EXPECT_MAT_NEAR(b_gold, d_b, rows * cols * 1e-4);
|
EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_P(Dft, C2C)
|
TEST_P(Dft, C2C)
|
||||||
|
@ -206,7 +206,7 @@ void App::run()
|
|||||||
workEnd();
|
workEnd();
|
||||||
|
|
||||||
// Show results
|
// Show results
|
||||||
disp = d_disp;
|
d_disp.download(disp);
|
||||||
putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
|
putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255));
|
||||||
imshow("disparity", disp);
|
imshow("disparity", disp);
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ int main(int argc, char* argv[])
|
|||||||
|
|
||||||
// drawing the results
|
// drawing the results
|
||||||
Mat img_matches;
|
Mat img_matches;
|
||||||
drawMatches(img1, keypoints1, img2, keypoints2, matches, img_matches);
|
drawMatches(Mat(img1), keypoints1, Mat(img2), keypoints2, matches, img_matches);
|
||||||
|
|
||||||
namedWindow("matches", 0);
|
namedWindow("matches", 0);
|
||||||
imshow("matches", img_matches);
|
imshow("matches", img_matches);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user