Merge remote-tracking branch 'master' into stitch-fix
* 'master' of github.com:itseez/opencv: (82 commits) moved part of video to contrib/{outflow, bgsegm}; moved matlab to contrib added some basic functionality needed by the new face module (moved from the old "contrib") moved to the new opencv_contrib/face module fixed various warnings and obvious errors reported by clang compiler and the coverity tool. Fixed review comment from Vadim Pisarevsky modified farneback sample to use T-API ECC patch by the author (G. Evangelidis); fixed some OCL Farneback optical flow test failures on Mac small fix for GaussianBlur ocl test fix binary package build small fix for ocl_resize fix IOS framework fixed test ocl_MatchTemplate for sparse matrix Fixed typos fixing error, wrong template method param. fixing Mac build some formal changes (generally adding constness) Fixed choice of kercn and rowsPerWI for non-Intel device. fixed nDiffs for CalcBackProject fixed tests for ocl_filter2d, ocl_matchTemplate, ocl_histogram.cpp Fixed issue: Mat::copyTo(UMat) if device copy is obsolete. Added test. ... Conflicts: modules/core/include/opencv2/core/mat.inl.hpp
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
set(the_description "The Core Functionality")
|
||||
ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" OPTIONAL opencv_cudev)
|
||||
ocv_module_include_directories(${ZLIB_INCLUDE_DIRS})
|
||||
|
||||
if(HAVE_WINRT_CX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW")
|
||||
@@ -19,11 +18,11 @@ file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "incl
|
||||
source_group("Cuda Headers" FILES ${lib_cuda_hdrs})
|
||||
source_group("Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail})
|
||||
|
||||
ocv_glob_module_sources(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
|
||||
ocv_glob_module_sources(SOURCES "${OPENCV_MODULE_opencv_core_BINARY_DIR}/version_string.inc"
|
||||
HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
|
||||
|
||||
ocv_module_include_directories(${the_module} ${ZLIB_INCLUDE_DIRS})
|
||||
ocv_create_module()
|
||||
ocv_add_precompiled_headers(${the_module})
|
||||
|
||||
ocv_add_accuracy_tests()
|
||||
ocv_add_perf_tests()
|
||||
|
@@ -845,7 +845,6 @@ For convenience, the following types from the OpenCV C API already have such a s
|
||||
that calls the appropriate release function:
|
||||
|
||||
* ``CvCapture``
|
||||
* :ocv:struct:`CvDTreeSplit`
|
||||
* :ocv:struct:`CvFileStorage`
|
||||
* ``CvHaarClassifierCascade``
|
||||
* :ocv:struct:`CvMat`
|
||||
@@ -2326,6 +2325,69 @@ Returns the matrix iterator and sets it to the after-last matrix element.
|
||||
|
||||
The methods return the matrix read-only or read-write iterators, set to the point following the last matrix element.
|
||||
|
||||
|
||||
Mat::forEach
|
||||
------------
|
||||
Invoke with arguments functor, and runs the functor over all matrix element.
|
||||
|
||||
.. ocv:function:: template<typename _Tp, typename Functor> void Mat::forEach(Functor operation)
|
||||
|
||||
.. ocv:function:: template<typename _Tp, typename Functor> void Mat::forEach(Functor operation) const
|
||||
|
||||
The methos runs operation in parallel. Operation is passed by arguments. Operation have to be a function pointer, a function object or a lambda(C++11).
|
||||
|
||||
All of below operation is equal. Put 0xFF to first channel of all matrix elements. ::
|
||||
|
||||
Mat image(1920, 1080, CV_8UC3);
|
||||
typedef cv::Point3_<uint8_t> Pixel;
|
||||
|
||||
// first. raw pointer access.
|
||||
for (int r = 0; r < image.rows; ++r) {
|
||||
Pixel* ptr = image.ptr<Pixel>(0, r);
|
||||
const Pixel* ptr_end = ptr + image.cols;
|
||||
for (; ptr != ptr_end; ++ptr) {
|
||||
ptr->x = 255;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Using MatIterator. (Simple but there are a Iterator's overhead)
|
||||
for (Pixel &p : cv::Mat_<Pixel>(image)) {
|
||||
p.x = 255;
|
||||
}
|
||||
|
||||
|
||||
// Parallel execution with function object.
|
||||
struct Operator {
|
||||
void operator ()(Pixel &pixel, const int * position) {
|
||||
pixel.x = 255;
|
||||
}
|
||||
};
|
||||
image.forEach<Pixel>(Operator());
|
||||
|
||||
|
||||
// Parallel execution using C++11 lambda.
|
||||
image.forEach<Pixel>([](Pixel &p, const int * position) -> void {
|
||||
p.x = 255;
|
||||
});
|
||||
|
||||
position parameter is index of current pixel. ::
|
||||
|
||||
// Creating 3D matrix (255 x 255 x 255) typed uint8_t,
|
||||
// and initialize all elements by the value which equals elements position.
|
||||
// i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3).
|
||||
|
||||
int sizes[] = { 255, 255, 255 };
|
||||
typedef cv::Point3_<uint8_t> Pixel;
|
||||
|
||||
Mat_<Pixel> image = Mat::zeros(3, sizes, CV_8UC3);
|
||||
|
||||
image.forEachWithPosition([&](Pixel& pixel, const int position[]) -> void{
|
||||
pixel.x = position[0];
|
||||
pixel.y = position[1];
|
||||
pixel.z = position[2];
|
||||
});
|
||||
|
||||
Mat\_
|
||||
-----
|
||||
.. ocv:class:: Mat_
|
||||
|
@@ -690,7 +690,61 @@ public:
|
||||
Mat mean; //!< mean value subtracted before the projection and added after the back projection
|
||||
};
|
||||
|
||||
// Linear Discriminant Analysis
|
||||
class CV_EXPORTS LDA
|
||||
{
|
||||
public:
|
||||
// Initializes a LDA with num_components (default 0) and specifies how
|
||||
// samples are aligned (default dataAsRow=true).
|
||||
explicit LDA(int num_components = 0);
|
||||
|
||||
// Initializes and performs a Discriminant Analysis with Fisher's
|
||||
// Optimization Criterion on given data in src and corresponding labels
|
||||
// in labels. If 0 (or less) number of components are given, they are
|
||||
// automatically determined for given data in computation.
|
||||
LDA(InputArrayOfArrays src, InputArray labels, int num_components = 0);
|
||||
|
||||
// Serializes this object to a given filename.
|
||||
void save(const String& filename) const;
|
||||
|
||||
// Deserializes this object from a given filename.
|
||||
void load(const String& filename);
|
||||
|
||||
// Serializes this object to a given cv::FileStorage.
|
||||
void save(FileStorage& fs) const;
|
||||
|
||||
// Deserializes this object from a given cv::FileStorage.
|
||||
void load(const FileStorage& node);
|
||||
|
||||
// Destructor.
|
||||
~LDA();
|
||||
|
||||
//! Compute the discriminants for data in src and labels.
|
||||
void compute(InputArrayOfArrays src, InputArray labels);
|
||||
|
||||
// Projects samples into the LDA subspace.
|
||||
Mat project(InputArray src);
|
||||
|
||||
// Reconstructs projections from the LDA subspace.
|
||||
Mat reconstruct(InputArray src);
|
||||
|
||||
// Returns the eigenvectors of this LDA.
|
||||
Mat eigenvectors() const { return _eigenvectors; }
|
||||
|
||||
// Returns the eigenvalues of this LDA.
|
||||
Mat eigenvalues() const { return _eigenvalues; }
|
||||
|
||||
static Mat subspaceProject(InputArray W, InputArray mean, InputArray src);
|
||||
static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src);
|
||||
|
||||
protected:
|
||||
bool _dataAsRow;
|
||||
int _num_components;
|
||||
Mat _eigenvectors;
|
||||
Mat _eigenvalues;
|
||||
|
||||
void lda(InputArrayOfArrays src, InputArray labels);
|
||||
};
|
||||
|
||||
/*!
|
||||
Singular Value Decomposition class
|
||||
|
@@ -261,8 +261,8 @@ public:
|
||||
int* refcount;
|
||||
|
||||
//! helper fields used in locateROI and adjustROI
|
||||
uchar* datastart;
|
||||
uchar* dataend;
|
||||
const uchar* datastart;
|
||||
const uchar* dataend;
|
||||
|
||||
//! allocator
|
||||
Allocator* allocator;
|
||||
@@ -349,8 +349,8 @@ public:
|
||||
uchar* data;
|
||||
int* refcount;
|
||||
|
||||
uchar* datastart;
|
||||
uchar* dataend;
|
||||
const uchar* datastart;
|
||||
const uchar* dataend;
|
||||
|
||||
AllocType alloc_type;
|
||||
};
|
||||
|
@@ -395,7 +395,7 @@ struct CV_EXPORTS UMatData
|
||||
|
||||
struct CV_EXPORTS UMatDataAutoLock
|
||||
{
|
||||
UMatDataAutoLock(UMatData* u);
|
||||
explicit UMatDataAutoLock(UMatData* u);
|
||||
~UMatDataAutoLock();
|
||||
UMatData* u;
|
||||
};
|
||||
@@ -403,7 +403,7 @@ struct CV_EXPORTS UMatDataAutoLock
|
||||
|
||||
struct CV_EXPORTS MatSize
|
||||
{
|
||||
MatSize(int* _p);
|
||||
explicit MatSize(int* _p);
|
||||
Size operator()() const;
|
||||
const int& operator[](int i) const;
|
||||
int& operator[](int i);
|
||||
@@ -417,7 +417,7 @@ struct CV_EXPORTS MatSize
|
||||
struct CV_EXPORTS MatStep
|
||||
{
|
||||
MatStep();
|
||||
MatStep(size_t s);
|
||||
explicit MatStep(size_t s);
|
||||
const size_t& operator[](int i) const;
|
||||
size_t& operator[](int i);
|
||||
operator size_t() const;
|
||||
@@ -900,6 +900,11 @@ public:
|
||||
template<typename _Tp> MatConstIterator_<_Tp> begin() const;
|
||||
template<typename _Tp> MatConstIterator_<_Tp> end() const;
|
||||
|
||||
//! template methods for for operation over all matrix elements.
|
||||
// the operations take care of skipping gaps in the end of rows (if any)
|
||||
template<typename _Tp, typename Functor> void forEach(const Functor& operation);
|
||||
template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
|
||||
|
||||
enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
|
||||
enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
|
||||
|
||||
@@ -918,9 +923,9 @@ public:
|
||||
uchar* data;
|
||||
|
||||
//! helper fields used in locateROI and adjustROI
|
||||
uchar* datastart;
|
||||
uchar* dataend;
|
||||
uchar* datalimit;
|
||||
const uchar* datastart;
|
||||
const uchar* dataend;
|
||||
const uchar* datalimit;
|
||||
|
||||
//! custom allocator
|
||||
MatAllocator* allocator;
|
||||
@@ -934,6 +939,7 @@ public:
|
||||
MatStep step;
|
||||
|
||||
protected:
|
||||
template<typename _Tp, typename Functor> void forEach_impl(const Functor& operation);
|
||||
};
|
||||
|
||||
|
||||
@@ -1043,6 +1049,11 @@ public:
|
||||
const_iterator begin() const;
|
||||
const_iterator end() const;
|
||||
|
||||
//! template methods for for operation over all matrix elements.
|
||||
// the operations take care of skipping gaps in the end of rows (if any)
|
||||
template<typename Functor> void forEach(const Functor& operation);
|
||||
template<typename Functor> void forEach(const Functor& operation) const;
|
||||
|
||||
//! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
|
||||
void create(int _rows, int _cols);
|
||||
//! equivalent to Mat::create(_size, DataType<_Tp>::type)
|
||||
@@ -1804,9 +1815,9 @@ public:
|
||||
//! copy operator
|
||||
MatConstIterator& operator = (const MatConstIterator& it);
|
||||
//! returns the current matrix element
|
||||
uchar* operator *() const;
|
||||
const uchar* operator *() const;
|
||||
//! returns the i-th matrix element, relative to the current
|
||||
uchar* operator [](ptrdiff_t i) const;
|
||||
const uchar* operator [](ptrdiff_t i) const;
|
||||
|
||||
//! shifts the iterator forward by the specified number of elements
|
||||
MatConstIterator& operator += (ptrdiff_t ofs);
|
||||
@@ -1831,9 +1842,9 @@ public:
|
||||
|
||||
const Mat* m;
|
||||
size_t elemSize;
|
||||
uchar* ptr;
|
||||
uchar* sliceStart;
|
||||
uchar* sliceEnd;
|
||||
const uchar* ptr;
|
||||
const uchar* sliceStart;
|
||||
const uchar* sliceEnd;
|
||||
};
|
||||
|
||||
|
||||
@@ -1917,9 +1928,9 @@ public:
|
||||
//! constructor that sets the iterator to the specified element of the matrix
|
||||
MatIterator_(Mat_<_Tp>* _m, int _row, int _col=0);
|
||||
//! constructor that sets the iterator to the specified element of the matrix
|
||||
MatIterator_(const Mat_<_Tp>* _m, Point _pt);
|
||||
MatIterator_(Mat_<_Tp>* _m, Point _pt);
|
||||
//! constructor that sets the iterator to the specified element of the matrix
|
||||
MatIterator_(const Mat_<_Tp>* _m, const int* _idx);
|
||||
MatIterator_(Mat_<_Tp>* _m, const int* _idx);
|
||||
//! copy constructor
|
||||
MatIterator_(const MatIterator_& it);
|
||||
//! copy operator
|
||||
|
@@ -438,7 +438,7 @@ Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
|
||||
if( !copyData )
|
||||
{
|
||||
step[0] = step[1] = sizeof(_Tp);
|
||||
data = datastart = (uchar*)&vec[0];
|
||||
datastart = data = (uchar*)&vec[0];
|
||||
datalimit = dataend = datastart + rows * step[0];
|
||||
}
|
||||
else
|
||||
@@ -453,7 +453,7 @@ Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
|
||||
if( !copyData )
|
||||
{
|
||||
step[0] = step[1] = sizeof(_Tp);
|
||||
data = datastart = (uchar*)vec.val;
|
||||
datastart = data = (uchar*)vec.val;
|
||||
datalimit = dataend = datastart + rows * step[0];
|
||||
}
|
||||
else
|
||||
@@ -470,7 +470,7 @@ Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
|
||||
{
|
||||
step[0] = cols * sizeof(_Tp);
|
||||
step[1] = sizeof(_Tp);
|
||||
data = datastart = (uchar*)M.val;
|
||||
datastart = data = (uchar*)M.val;
|
||||
datalimit = dataend = datastart + rows * step[0];
|
||||
}
|
||||
else
|
||||
@@ -485,7 +485,7 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData)
|
||||
if( !copyData )
|
||||
{
|
||||
step[0] = step[1] = sizeof(_Tp);
|
||||
data = datastart = (uchar*)&pt.x;
|
||||
datastart = data = (uchar*)&pt.x;
|
||||
datalimit = dataend = datastart + rows * step[0];
|
||||
}
|
||||
else
|
||||
@@ -504,7 +504,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
|
||||
if( !copyData )
|
||||
{
|
||||
step[0] = step[1] = sizeof(_Tp);
|
||||
data = datastart = (uchar*)&pt.x;
|
||||
datastart = data = (uchar*)&pt.x;
|
||||
datalimit = dataend = datastart + rows * step[0];
|
||||
}
|
||||
else
|
||||
@@ -642,7 +642,7 @@ inline void Mat::release()
|
||||
if( u && CV_XADD(&u->refcount, -1) == 1 )
|
||||
deallocate();
|
||||
u = NULL;
|
||||
data = datastart = dataend = datalimit = 0;
|
||||
datastart = dataend = datalimit = data = 0;
|
||||
for(int i = 0; i < dims; i++)
|
||||
size.p[i] = 0;
|
||||
}
|
||||
@@ -1000,6 +1000,17 @@ MatIterator_<_Tp> Mat::end()
|
||||
return it;
|
||||
}
|
||||
|
||||
template<typename _Tp, typename Functor> inline
|
||||
void Mat::forEach(const Functor& operation) {
|
||||
this->forEach_impl<_Tp>(operation);
|
||||
};
|
||||
|
||||
template<typename _Tp, typename Functor> inline
|
||||
void Mat::forEach(const Functor& operation) const {
|
||||
// call as not const
|
||||
(const_cast<Mat*>(this))->forEach<const _Tp>(operation);
|
||||
};
|
||||
|
||||
template<typename _Tp> inline
|
||||
Mat::operator std::vector<_Tp>() const
|
||||
{
|
||||
@@ -1045,7 +1056,7 @@ void Mat::push_back(const _Tp& elem)
|
||||
}
|
||||
CV_Assert(DataType<_Tp>::type == type() && cols == 1
|
||||
/* && dims == 2 (cols == 1 implies dims == 2) */);
|
||||
uchar* tmp = dataend + step[0];
|
||||
const uchar* tmp = dataend + step[0];
|
||||
if( !isSubmatrix() && isContinuous() && tmp <= datalimit )
|
||||
{
|
||||
*(_Tp*)(data + (size.p[0]++) * step.p[0]) = elem;
|
||||
@@ -1585,6 +1596,15 @@ MatIterator_<_Tp> Mat_<_Tp>::end()
|
||||
return Mat::end<_Tp>();
|
||||
}
|
||||
|
||||
template<typename _Tp> template<typename Functor> inline
|
||||
void Mat_<_Tp>::forEach(const Functor& operation) {
|
||||
Mat::forEach<_Tp, Functor>(operation);
|
||||
}
|
||||
|
||||
template<typename _Tp> template<typename Functor> inline
|
||||
void Mat_<_Tp>::forEach(const Functor& operation) const {
|
||||
Mat::forEach<_Tp, Functor>(operation);
|
||||
}
|
||||
|
||||
///////////////////////////// SparseMat /////////////////////////////
|
||||
|
||||
@@ -2149,7 +2169,7 @@ MatConstIterator& MatConstIterator::operator = (const MatConstIterator& it )
|
||||
}
|
||||
|
||||
inline
|
||||
uchar* MatConstIterator::operator *() const
|
||||
const uchar* MatConstIterator::operator *() const
|
||||
{
|
||||
return ptr;
|
||||
}
|
||||
@@ -2282,7 +2302,7 @@ MatConstIterator operator - (const MatConstIterator& a, ptrdiff_t ofs)
|
||||
|
||||
|
||||
inline
|
||||
uchar* MatConstIterator::operator [](ptrdiff_t i) const
|
||||
const uchar* MatConstIterator::operator [](ptrdiff_t i) const
|
||||
{
|
||||
return *(*this + i);
|
||||
}
|
||||
@@ -2454,12 +2474,12 @@ MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, int _row, int _col)
|
||||
{}
|
||||
|
||||
template<typename _Tp> inline
|
||||
MatIterator_<_Tp>::MatIterator_(const Mat_<_Tp>* _m, Point _pt)
|
||||
MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, Point _pt)
|
||||
: MatConstIterator_<_Tp>(_m, _pt)
|
||||
{}
|
||||
|
||||
template<typename _Tp> inline
|
||||
MatIterator_<_Tp>::MatIterator_(const Mat_<_Tp>* _m, const int* _idx)
|
||||
MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, const int* _idx)
|
||||
: MatConstIterator_<_Tp>(_m, _idx)
|
||||
{}
|
||||
|
||||
@@ -2593,7 +2613,7 @@ inline SparseMatConstIterator& SparseMatConstIterator::operator = (const SparseM
|
||||
template<typename _Tp> inline
|
||||
const _Tp& SparseMatConstIterator::value() const
|
||||
{
|
||||
return *(_Tp*)ptr;
|
||||
return *(const _Tp*)ptr;
|
||||
}
|
||||
|
||||
inline
|
||||
|
@@ -636,6 +636,9 @@ protected:
|
||||
|
||||
CV_EXPORTS MatAllocator* getOpenCLAllocator();
|
||||
|
||||
CV_EXPORTS_W bool isPerformanceCheckBypassed();
|
||||
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
|
||||
|
||||
}}
|
||||
|
||||
#endif
|
||||
|
@@ -274,6 +274,102 @@ public:
|
||||
|
||||
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
|
||||
|
||||
/////////////////////////////// forEach method of cv::Mat ////////////////////////////
|
||||
template<typename _Tp, typename Functor> inline
|
||||
void Mat::forEach_impl(const Functor& operation) {
|
||||
if (false) {
|
||||
operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(NULL));
|
||||
// If your compiler fail in this line.
|
||||
// Please check that your functor signature is
|
||||
// (_Tp&, const int*) <- multidimential
|
||||
// or (_Tp&, void*) <- in case of you don't need current idx.
|
||||
}
|
||||
|
||||
CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
|
||||
const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
|
||||
|
||||
class PixelOperationWrapper :public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
|
||||
: mat(frame), op(_operation) {};
|
||||
virtual ~PixelOperationWrapper(){};
|
||||
// ! Overloaded virtual operator
|
||||
// convert range call to row call.
|
||||
virtual void operator()(const Range &range) const {
|
||||
const int DIMS = mat->dims;
|
||||
const int COLS = mat->size[DIMS - 1];
|
||||
if (DIMS <= 2) {
|
||||
for (int row = range.start; row < range.end; ++row) {
|
||||
this->rowCall2(row, COLS);
|
||||
}
|
||||
} else {
|
||||
std::vector<int> idx(COLS); /// idx is modified in this->rowCall
|
||||
idx[DIMS - 2] = range.start - 1;
|
||||
|
||||
for (int line_num = range.start; line_num < range.end; ++line_num) {
|
||||
idx[DIMS - 2]++;
|
||||
for (int i = DIMS - 2; i >= 0; --i) {
|
||||
if (idx[i] >= mat->size[i]) {
|
||||
idx[i - 1] += idx[i] / mat->size[i];
|
||||
idx[i] %= mat->size[i];
|
||||
continue; // carry-over;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
this->rowCall(&idx[0], COLS, DIMS);
|
||||
}
|
||||
}
|
||||
};
|
||||
private:
|
||||
Mat_<_Tp>* const mat;
|
||||
const Functor op;
|
||||
// ! Call operator for each elements in this row.
|
||||
inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
|
||||
int &col = idx[DIMS - 1];
|
||||
col = 0;
|
||||
_Tp* pixel = &(mat->template at<_Tp>(idx));
|
||||
|
||||
while (col < COLS) {
|
||||
op(*pixel, const_cast<const int*>(idx));
|
||||
pixel++; col++;
|
||||
}
|
||||
col = 0;
|
||||
}
|
||||
// ! Call operator for each elements in this row. 2d mat special version.
|
||||
inline void rowCall2(const int row, const int COLS) const {
|
||||
union Index{
|
||||
int body[2];
|
||||
operator const int*() const {
|
||||
return reinterpret_cast<const int*>(this);
|
||||
}
|
||||
int& operator[](const int i) {
|
||||
return body[i];
|
||||
}
|
||||
} idx = {{row, 0}};
|
||||
// Special union is needed to avoid
|
||||
// "error: array subscript is above array bounds [-Werror=array-bounds]"
|
||||
// when call the functor `op` such that access idx[3].
|
||||
|
||||
_Tp* pixel = &(mat->template at<_Tp>(idx));
|
||||
const _Tp* const pixel_end = pixel + COLS;
|
||||
while(pixel < pixel_end) {
|
||||
op(*pixel++, static_cast<const int*>(idx));
|
||||
idx[1]++;
|
||||
}
|
||||
};
|
||||
PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
|
||||
CV_Assert(false);
|
||||
// We can not remove this implementation because Visual Studio warning C4822.
|
||||
return *this;
|
||||
};
|
||||
};
|
||||
|
||||
parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
|
||||
};
|
||||
|
||||
/////////////////////////// Synchronization Primitives ///////////////////////////////
|
||||
|
||||
class CV_EXPORTS Mutex
|
||||
|
@@ -39,7 +39,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -4,7 +4,7 @@
|
||||
//
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -44,7 +44,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -44,7 +44,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -44,7 +44,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -5,7 +5,7 @@
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -4,7 +4,7 @@
|
||||
//
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "../perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -47,7 +47,7 @@
|
||||
// */
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@@ -1607,7 +1607,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
|
||||
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
|
||||
#ifdef HAVE_OPENCL
|
||||
bool use_opencl = _dst.isUMat() && dims1 <= 2 && dims2 <= 2;
|
||||
bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2;
|
||||
#endif
|
||||
bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
|
||||
bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
|
||||
@@ -2440,6 +2440,34 @@ addWeighted8u( const uchar* src1, size_t step1,
|
||||
_mm_storel_epi64((__m128i*)(dst + x), u);
|
||||
}
|
||||
}
|
||||
#elif CV_NEON
|
||||
float32x4_t g = vdupq_n_f32 (gamma);
|
||||
|
||||
for( ; x <= size.width - 8; x += 8 )
|
||||
{
|
||||
uint8x8_t in1 = vld1_u8(src1+x);
|
||||
uint16x8_t in1_16 = vmovl_u8(in1);
|
||||
float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16)));
|
||||
float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16)));
|
||||
|
||||
uint8x8_t in2 = vld1_u8(src2+x);
|
||||
uint16x8_t in2_16 = vmovl_u8(in2);
|
||||
float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16)));
|
||||
float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16)));
|
||||
|
||||
float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta));
|
||||
float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta));
|
||||
out_f_l = vaddq_f32(out_f_l, g);
|
||||
out_f_h = vaddq_f32(out_f_h, g);
|
||||
|
||||
uint16x4_t out_16_l = vqmovun_s32(vcvtq_s32_f32(out_f_l));
|
||||
uint16x4_t out_16_h = vqmovun_s32(vcvtq_s32_f32(out_f_h));
|
||||
|
||||
uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h);
|
||||
uint8x8_t out = vqmovn_u16(out_16);
|
||||
|
||||
vst1_u8(dst+x, out);
|
||||
}
|
||||
#endif
|
||||
#if CV_ENABLE_UNROLLED
|
||||
for( ; x <= size.width - 4; x += 4 )
|
||||
@@ -2650,6 +2678,14 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
|
||||
|
||||
}
|
||||
}
|
||||
#elif CV_NEON
|
||||
uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
|
||||
|
||||
for( ; x <= size.width - 16; x += 16 )
|
||||
{
|
||||
vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
for( ; x < size.width; x++ ){
|
||||
@@ -2674,6 +2710,13 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
|
||||
_mm_storeu_si128((__m128i*)(dst + x), r00);
|
||||
}
|
||||
}
|
||||
#elif CV_NEON
|
||||
uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
|
||||
|
||||
for( ; x <= size.width - 16; x += 16 )
|
||||
{
|
||||
vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
|
||||
}
|
||||
#endif
|
||||
for( ; x < size.width; x++ )
|
||||
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
|
||||
@@ -2759,6 +2802,22 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
|
||||
x += 8;
|
||||
}
|
||||
}
|
||||
#elif CV_NEON
|
||||
uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
|
||||
|
||||
for( ; x <= size.width - 16; x += 16 )
|
||||
{
|
||||
int16x8_t in1 = vld1q_s16(src1 + x);
|
||||
int16x8_t in2 = vld1q_s16(src2 + x);
|
||||
uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2));
|
||||
|
||||
in1 = vld1q_s16(src1 + x + 8);
|
||||
in2 = vld1q_s16(src2 + x + 8);
|
||||
uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2));
|
||||
|
||||
vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
for( ; x < size.width; x++ ){
|
||||
@@ -2797,6 +2856,21 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
|
||||
x += 8;
|
||||
}
|
||||
}
|
||||
#elif CV_NEON
|
||||
uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
|
||||
|
||||
for( ; x <= size.width - 16; x += 16 )
|
||||
{
|
||||
int16x8_t in1 = vld1q_s16(src1 + x);
|
||||
int16x8_t in2 = vld1q_s16(src2 + x);
|
||||
uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2));
|
||||
|
||||
in1 = vld1q_s16(src1 + x + 8);
|
||||
in2 = vld1q_s16(src2 + x + 8);
|
||||
uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2));
|
||||
|
||||
vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
|
||||
}
|
||||
#endif
|
||||
for( ; x < size.width; x++ )
|
||||
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
|
||||
@@ -2982,7 +3056,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
|
||||
haveScalar = true;
|
||||
}
|
||||
|
||||
CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat(),
|
||||
CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
|
||||
ocl_compare(_src1, _src2, _dst, op, haveScalar))
|
||||
|
||||
int kind1 = _src1.kind(), kind2 = _src2.kind();
|
||||
@@ -3085,7 +3159,7 @@ namespace cv
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
struct InRange_SSE
|
||||
struct InRange_SIMD
|
||||
{
|
||||
int operator () (const T *, const T *, const T *, uchar *, int) const
|
||||
{
|
||||
@@ -3096,7 +3170,7 @@ struct InRange_SSE
|
||||
#if CV_SSE2
|
||||
|
||||
template <>
|
||||
struct InRange_SSE<uchar>
|
||||
struct InRange_SIMD<uchar>
|
||||
{
|
||||
int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
|
||||
uchar * dst, int len) const
|
||||
@@ -3121,7 +3195,7 @@ struct InRange_SSE<uchar>
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SSE<schar>
|
||||
struct InRange_SIMD<schar>
|
||||
{
|
||||
int operator () (const schar * src1, const schar * src2, const schar * src3,
|
||||
uchar * dst, int len) const
|
||||
@@ -3146,7 +3220,7 @@ struct InRange_SSE<schar>
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SSE<ushort>
|
||||
struct InRange_SIMD<ushort>
|
||||
{
|
||||
int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
|
||||
uchar * dst, int len) const
|
||||
@@ -3172,7 +3246,7 @@ struct InRange_SSE<ushort>
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SSE<short>
|
||||
struct InRange_SIMD<short>
|
||||
{
|
||||
int operator () (const short * src1, const short * src2, const short * src3,
|
||||
uchar * dst, int len) const
|
||||
@@ -3198,7 +3272,7 @@ struct InRange_SSE<short>
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SSE<int>
|
||||
struct InRange_SIMD<int>
|
||||
{
|
||||
int operator () (const int * src1, const int * src2, const int * src3,
|
||||
uchar * dst, int len) const
|
||||
@@ -3230,7 +3304,7 @@ struct InRange_SSE<int>
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SSE<float>
|
||||
struct InRange_SIMD<float>
|
||||
{
|
||||
int operator () (const float * src1, const float * src2, const float * src3,
|
||||
uchar * dst, int len) const
|
||||
@@ -3261,6 +3335,160 @@ struct InRange_SSE<float>
|
||||
}
|
||||
};
|
||||
|
||||
#elif CV_NEON
|
||||
|
||||
template <>
|
||||
struct InRange_SIMD<uchar>
|
||||
{
|
||||
int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
|
||||
uchar * dst, int len) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for ( ; x <= len - 16; x += 16 )
|
||||
{
|
||||
uint8x16_t values = vld1q_u8(src1 + x);
|
||||
uint8x16_t low = vld1q_u8(src2 + x);
|
||||
uint8x16_t high = vld1q_u8(src3 + x);
|
||||
|
||||
vst1q_u8(dst + x, vandq_u8(vcgeq_u8(values, low), vcgeq_u8(high, values)));
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SIMD<schar>
|
||||
{
|
||||
int operator () (const schar * src1, const schar * src2, const schar * src3,
|
||||
uchar * dst, int len) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for ( ; x <= len - 16; x += 16 )
|
||||
{
|
||||
int8x16_t values = vld1q_s8(src1 + x);
|
||||
int8x16_t low = vld1q_s8(src2 + x);
|
||||
int8x16_t high = vld1q_s8(src3 + x);
|
||||
|
||||
vst1q_u8(dst + x, vandq_u8(vcgeq_s8(values, low), vcgeq_s8(high, values)));
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SIMD<ushort>
|
||||
{
|
||||
int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
|
||||
uchar * dst, int len) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for ( ; x <= len - 16; x += 16 )
|
||||
{
|
||||
uint16x8_t values = vld1q_u16((const uint16_t*)(src1 + x));
|
||||
uint16x8_t low = vld1q_u16((const uint16_t*)(src2 + x));
|
||||
uint16x8_t high = vld1q_u16((const uint16_t*)(src3 + x));
|
||||
uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
|
||||
|
||||
values = vld1q_u16((const uint16_t*)(src1 + x + 8));
|
||||
low = vld1q_u16((const uint16_t*)(src2 + x + 8));
|
||||
high = vld1q_u16((const uint16_t*)(src3 + x + 8));
|
||||
uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
|
||||
|
||||
vst1q_u8(dst + x, vcombine_u8(r1, r2));
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SIMD<short>
|
||||
{
|
||||
int operator () (const short * src1, const short * src2, const short * src3,
|
||||
uchar * dst, int len) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for ( ; x <= len - 16; x += 16 )
|
||||
{
|
||||
int16x8_t values = vld1q_s16((const int16_t*)(src1 + x));
|
||||
int16x8_t low = vld1q_s16((const int16_t*)(src2 + x));
|
||||
int16x8_t high = vld1q_s16((const int16_t*)(src3 + x));
|
||||
uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
|
||||
|
||||
values = vld1q_s16((const int16_t*)(src1 + x + 8));
|
||||
low = vld1q_s16((const int16_t*)(src2 + x + 8));
|
||||
high = vld1q_s16((const int16_t*)(src3 + x + 8));
|
||||
uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
|
||||
|
||||
vst1q_u8(dst + x, vcombine_u8(r1, r2));
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SIMD<int>
|
||||
{
|
||||
int operator () (const int * src1, const int * src2, const int * src3,
|
||||
uchar * dst, int len) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for ( ; x <= len - 8; x += 8 )
|
||||
{
|
||||
int32x4_t values = vld1q_s32((const int32_t*)(src1 + x));
|
||||
int32x4_t low = vld1q_s32((const int32_t*)(src2 + x));
|
||||
int32x4_t high = vld1q_s32((const int32_t*)(src3 + x));
|
||||
|
||||
uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
|
||||
|
||||
values = vld1q_s32((const int32_t*)(src1 + x + 4));
|
||||
low = vld1q_s32((const int32_t*)(src2 + x + 4));
|
||||
high = vld1q_s32((const int32_t*)(src3 + x + 4));
|
||||
|
||||
uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
|
||||
|
||||
uint16x8_t res_16 = vcombine_u16(r1, r2);
|
||||
|
||||
vst1_u8(dst + x, vmovn_u16(res_16));
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct InRange_SIMD<float>
|
||||
{
|
||||
int operator () (const float * src1, const float * src2, const float * src3,
|
||||
uchar * dst, int len) const
|
||||
{
|
||||
int x = 0;
|
||||
|
||||
for ( ; x <= len - 8; x += 8 )
|
||||
{
|
||||
float32x4_t values = vld1q_f32((const float32_t*)(src1 + x));
|
||||
float32x4_t low = vld1q_f32((const float32_t*)(src2 + x));
|
||||
float32x4_t high = vld1q_f32((const float32_t*)(src3 + x));
|
||||
|
||||
uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
|
||||
|
||||
values = vld1q_f32((const float32_t*)(src1 + x + 4));
|
||||
low = vld1q_f32((const float32_t*)(src2 + x + 4));
|
||||
high = vld1q_f32((const float32_t*)(src3 + x + 4));
|
||||
|
||||
uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
|
||||
|
||||
uint16x8_t res_16 = vcombine_u16(r1, r2);
|
||||
|
||||
vst1_u8(dst + x, vmovn_u16(res_16));
|
||||
}
|
||||
return x;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
@@ -3272,7 +3500,7 @@ static void inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
|
||||
step2 /= sizeof(src2[0]);
|
||||
step3 /= sizeof(src3[0]);
|
||||
|
||||
InRange_SSE<T> vop;
|
||||
InRange_SIMD<T> vop;
|
||||
|
||||
for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step )
|
||||
{
|
||||
@@ -3500,7 +3728,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
|
||||
InputArray _upperb, OutputArray _dst)
|
||||
{
|
||||
CV_OCL_RUN(_src.dims() <= 2 && _lowerb.dims() <= 2 &&
|
||||
_upperb.dims() <= 2 && _dst.isUMat(),
|
||||
_upperb.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
|
||||
ocl_inRange(_src, _lowerb, _upperb, _dst))
|
||||
|
||||
int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
|
||||
|
@@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@@ -50,6 +50,71 @@ namespace cv
|
||||
* split & merge *
|
||||
\****************************************************************************************/
|
||||
|
||||
#if CV_NEON
|
||||
template<typename T> struct VSplit2;
|
||||
template<typename T> struct VSplit3;
|
||||
template<typename T> struct VSplit4;
|
||||
|
||||
#define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src, data_type* dst0, data_type* dst1){ \
|
||||
reg_type r = load_func(src); \
|
||||
store_func(dst0, r.val[0]); \
|
||||
store_func(dst1, r.val[1]); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
|
||||
data_type* dst2){ \
|
||||
reg_type r = load_func(src); \
|
||||
store_func(dst0, r.val[0]); \
|
||||
store_func(dst1, r.val[1]); \
|
||||
store_func(dst2, r.val[2]); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
|
||||
data_type* dst2, data_type* dst3){ \
|
||||
reg_type r = load_func(src); \
|
||||
store_func(dst0, r.val[0]); \
|
||||
store_func(dst1, r.val[1]); \
|
||||
store_func(dst2, r.val[2]); \
|
||||
store_func(dst3, r.val[3]); \
|
||||
} \
|
||||
}
|
||||
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar , uint8x16x2_t, vld2q_u8 , vst1q_u8 );
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, schar , int8x16x2_t, vld2q_s8 , vst1q_s8 );
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort, uint16x8x2_t, vld2q_u16, vst1q_u16);
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, short , int16x8x2_t, vld2q_s16, vst1q_s16);
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, int , int32x4x2_t, vld2q_s32, vst1q_s32);
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, float , float32x4x2_t, vld2q_f32, vst1q_f32);
|
||||
SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 , int64x1x2_t, vld2_s64 , vst1_s64 );
|
||||
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar , uint8x16x3_t, vld3q_u8 , vst1q_u8 );
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, schar , int8x16x3_t, vld3q_s8 , vst1q_s8 );
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort, uint16x8x3_t, vld3q_u16, vst1q_u16);
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, short , int16x8x3_t, vld3q_s16, vst1q_s16);
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, int , int32x4x3_t, vld3q_s32, vst1q_s32);
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, float , float32x4x3_t, vld3q_f32, vst1q_f32);
|
||||
SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 , int64x1x3_t, vld3_s64 , vst1_s64 );
|
||||
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar , uint8x16x4_t, vld4q_u8 , vst1q_u8 );
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, schar , int8x16x4_t, vld4q_s8 , vst1q_s8 );
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort, uint16x8x4_t, vld4q_u16, vst1q_u16);
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, short , int16x8x4_t, vld4q_s16, vst1q_s16);
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, int , int32x4x4_t, vld4q_s32, vst1q_s32);
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, float , float32x4x4_t, vld4q_f32, vst1q_f32);
|
||||
SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 , int64x1x4_t, vld4_s64 , vst1_s64 );
|
||||
#endif
|
||||
|
||||
template<typename T> static void
|
||||
split_( const T* src, T** dst, int len, int cn )
|
||||
{
|
||||
@@ -58,13 +123,34 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
if( k == 1 )
|
||||
{
|
||||
T* dst0 = dst[0];
|
||||
for( i = j = 0; i < len; i++, j += cn )
|
||||
dst0[i] = src[j];
|
||||
|
||||
if(cn == 1)
|
||||
{
|
||||
memcpy(dst0, src, len * sizeof(T));
|
||||
}
|
||||
else
|
||||
{
|
||||
for( i = 0, j = 0 ; i < len; i++, j += cn )
|
||||
dst0[i] = src[j];
|
||||
}
|
||||
}
|
||||
else if( k == 2 )
|
||||
{
|
||||
T *dst0 = dst[0], *dst1 = dst[1];
|
||||
for( i = j = 0; i < len; i++, j += cn )
|
||||
i = j = 0;
|
||||
|
||||
#if CV_NEON
|
||||
if(cn == 2)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 2 * inc_i;
|
||||
|
||||
VSplit2<T> vsplit;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst0[i] = src[j];
|
||||
dst1[i] = src[j+1];
|
||||
@@ -73,7 +159,20 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
else if( k == 3 )
|
||||
{
|
||||
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
|
||||
for( i = j = 0; i < len; i++, j += cn )
|
||||
i = j = 0;
|
||||
|
||||
#if CV_NEON
|
||||
if(cn == 3)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 3 * inc_i;
|
||||
|
||||
VSplit3<T> vsplit;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst0[i] = src[j];
|
||||
dst1[i] = src[j+1];
|
||||
@@ -83,7 +182,20 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
else
|
||||
{
|
||||
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3];
|
||||
for( i = j = 0; i < len; i++, j += cn )
|
||||
i = j = 0;
|
||||
|
||||
#if CV_NEON
|
||||
if(cn == 4)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 4 * inc_i;
|
||||
|
||||
VSplit4<T> vsplit;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst0[i] = src[j]; dst1[i] = src[j+1];
|
||||
dst2[i] = src[j+2]; dst3[i] = src[j+3];
|
||||
@@ -101,6 +213,77 @@ split_( const T* src, T** dst, int len, int cn )
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#if CV_NEON
|
||||
template<typename T> struct VMerge2;
|
||||
template<typename T> struct VMerge3;
|
||||
template<typename T> struct VMerge4;
|
||||
|
||||
#define MERGE2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src0, const data_type* src1, \
|
||||
data_type* dst){ \
|
||||
reg_type r; \
|
||||
r.val[0] = load_func(src0); \
|
||||
r.val[1] = load_func(src1); \
|
||||
store_func(dst, r); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MERGE3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src0, const data_type* src1, \
|
||||
const data_type* src2, data_type* dst){ \
|
||||
reg_type r; \
|
||||
r.val[0] = load_func(src0); \
|
||||
r.val[1] = load_func(src1); \
|
||||
r.val[2] = load_func(src2); \
|
||||
store_func(dst, r); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define MERGE4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
|
||||
template<> \
|
||||
struct name<data_type>{ \
|
||||
void operator()(const data_type* src0, const data_type* src1, \
|
||||
const data_type* src2, const data_type* src3, \
|
||||
data_type* dst){ \
|
||||
reg_type r; \
|
||||
r.val[0] = load_func(src0); \
|
||||
r.val[1] = load_func(src1); \
|
||||
r.val[2] = load_func(src2); \
|
||||
r.val[3] = load_func(src3); \
|
||||
store_func(dst, r); \
|
||||
} \
|
||||
}
|
||||
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, uchar , uint8x16x2_t, vld1q_u8 , vst2q_u8 );
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, schar , int8x16x2_t, vld1q_s8 , vst2q_s8 );
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, ushort, uint16x8x2_t, vld1q_u16, vst2q_u16);
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, short , int16x8x2_t, vld1q_s16, vst2q_s16);
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, int , int32x4x2_t, vld1q_s32, vst2q_s32);
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, float , float32x4x2_t, vld1q_f32, vst2q_f32);
|
||||
MERGE2_KERNEL_TEMPLATE(VMerge2, int64 , int64x1x2_t, vld1_s64 , vst2_s64 );
|
||||
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, uchar , uint8x16x3_t, vld1q_u8 , vst3q_u8 );
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, schar , int8x16x3_t, vld1q_s8 , vst3q_s8 );
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, ushort, uint16x8x3_t, vld1q_u16, vst3q_u16);
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, short , int16x8x3_t, vld1q_s16, vst3q_s16);
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, int , int32x4x3_t, vld1q_s32, vst3q_s32);
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, float , float32x4x3_t, vld1q_f32, vst3q_f32);
|
||||
MERGE3_KERNEL_TEMPLATE(VMerge3, int64 , int64x1x3_t, vld1_s64 , vst3_s64 );
|
||||
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, uchar , uint8x16x4_t, vld1q_u8 , vst4q_u8 );
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, schar , int8x16x4_t, vld1q_s8 , vst4q_s8 );
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, ushort, uint16x8x4_t, vld1q_u16, vst4q_u16);
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, short , int16x8x4_t, vld1q_s16, vst4q_s16);
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, int , int32x4x4_t, vld1q_s32, vst4q_s32);
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, float , float32x4x4_t, vld1q_f32, vst4q_f32);
|
||||
MERGE4_KERNEL_TEMPLATE(VMerge4, int64 , int64x1x4_t, vld1_s64 , vst4_s64 );
|
||||
#endif
|
||||
|
||||
template<typename T> static void
|
||||
merge_( const T** src, T* dst, int len, int cn )
|
||||
{
|
||||
@@ -115,7 +298,19 @@ merge_( const T** src, T* dst, int len, int cn )
|
||||
else if( k == 2 )
|
||||
{
|
||||
const T *src0 = src[0], *src1 = src[1];
|
||||
for( i = j = 0; i < len; i++, j += cn )
|
||||
i = j = 0;
|
||||
#if CV_NEON
|
||||
if(cn == 2)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 2 * inc_i;
|
||||
|
||||
VMerge2<T> vmerge;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, dst + j);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst[j] = src0[i];
|
||||
dst[j+1] = src1[i];
|
||||
@@ -124,7 +319,19 @@ merge_( const T** src, T* dst, int len, int cn )
|
||||
else if( k == 3 )
|
||||
{
|
||||
const T *src0 = src[0], *src1 = src[1], *src2 = src[2];
|
||||
for( i = j = 0; i < len; i++, j += cn )
|
||||
i = j = 0;
|
||||
#if CV_NEON
|
||||
if(cn == 3)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 3 * inc_i;
|
||||
|
||||
VMerge3<T> vmerge;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, src2 + i, dst + j);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst[j] = src0[i];
|
||||
dst[j+1] = src1[i];
|
||||
@@ -134,7 +341,19 @@ merge_( const T** src, T* dst, int len, int cn )
|
||||
else
|
||||
{
|
||||
const T *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
|
||||
for( i = j = 0; i < len; i++, j += cn )
|
||||
i = j = 0;
|
||||
#if CV_NEON
|
||||
if(cn == 4)
|
||||
{
|
||||
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
|
||||
int inc_j = 4 * inc_i;
|
||||
|
||||
VMerge4<T> vmerge;
|
||||
for( ; i < len - inc_i; i += inc_i, j += inc_j)
|
||||
vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j);
|
||||
}
|
||||
#endif
|
||||
for( ; i < len; i++, j += cn )
|
||||
{
|
||||
dst[j] = src0[i]; dst[j+1] = src1[i];
|
||||
dst[j+2] = src2[i]; dst[j+3] = src3[i];
|
||||
|
@@ -46,7 +46,7 @@
|
||||
// */
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
@@ -42,7 +42,7 @@
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
|
||||
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
#include <map>
|
||||
|
||||
namespace cv
|
||||
@@ -1801,11 +1801,11 @@ private:
|
||||
UMat twiddles;
|
||||
String buildOptions;
|
||||
int thread_count;
|
||||
bool status;
|
||||
int dft_size;
|
||||
bool status;
|
||||
|
||||
public:
|
||||
OCL_FftPlan(int _size): dft_size(_size), status(true)
|
||||
OCL_FftPlan(int _size) : dft_size(_size), status(true)
|
||||
{
|
||||
int min_radix;
|
||||
std::vector<int> radixes, blocks;
|
||||
@@ -2635,8 +2635,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
|
||||
for( i = 0; i < nonzero_rows; i++ )
|
||||
{
|
||||
uchar* sptr = src.data + i*src.step;
|
||||
uchar* dptr0 = dst.data + i*dst.step;
|
||||
const uchar* sptr = src.ptr(i);
|
||||
uchar* dptr0 = dst.ptr(i);
|
||||
uchar* dptr = dptr0;
|
||||
|
||||
if( tmp_buf )
|
||||
@@ -2649,7 +2649,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
|
||||
for( ; i < count; i++ )
|
||||
{
|
||||
uchar* dptr0 = dst.data + i*dst.step;
|
||||
uchar* dptr0 = dst.ptr(i);
|
||||
memset( dptr0, 0, dst_full_len );
|
||||
}
|
||||
|
||||
@@ -2661,7 +2661,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
{
|
||||
int a = 0, b = count;
|
||||
uchar *buf0, *buf1, *dbuf0, *dbuf1;
|
||||
uchar* sptr0 = src.data;
|
||||
const uchar* sptr0 = src.data;
|
||||
uchar* dptr0 = dst.data;
|
||||
buf0 = ptr;
|
||||
ptr += len*complex_elem_size;
|
||||
@@ -2800,7 +2800,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
int n = dst.cols;
|
||||
if( elem_size == (int)sizeof(float) )
|
||||
{
|
||||
float* p0 = (float*)dst.data;
|
||||
float* p0 = dst.ptr<float>();
|
||||
size_t dstep = dst.step/sizeof(p0[0]);
|
||||
for( i = 0; i < len; i++ )
|
||||
{
|
||||
@@ -2816,7 +2816,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
}
|
||||
else
|
||||
{
|
||||
double* p0 = (double*)dst.data;
|
||||
double* p0 = dst.ptr<double>();
|
||||
size_t dstep = dst.step/sizeof(p0[0]);
|
||||
for( i = 0; i < len; i++ )
|
||||
{
|
||||
|
@@ -955,10 +955,10 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
|
||||
SVD::compute(src, w, u, vt);
|
||||
SVD::backSubst(w, u, vt, Mat(), _dst);
|
||||
return type == CV_32F ?
|
||||
(((float*)w.data)[0] >= FLT_EPSILON ?
|
||||
((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
|
||||
(((double*)w.data)[0] >= DBL_EPSILON ?
|
||||
((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
|
||||
(w.ptr<float>()[0] >= FLT_EPSILON ?
|
||||
w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
|
||||
(w.ptr<double>()[0] >= DBL_EPSILON ?
|
||||
w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
|
||||
}
|
||||
|
||||
CV_Assert( m == n );
|
||||
@@ -975,10 +975,10 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
|
||||
transpose(vt, u);
|
||||
SVD::backSubst(w, u, vt, Mat(), _dst);
|
||||
return type == CV_32F ?
|
||||
(((float*)w.data)[0] >= FLT_EPSILON ?
|
||||
((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
|
||||
(((double*)w.data)[0] >= DBL_EPSILON ?
|
||||
((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
|
||||
(w.ptr<float>()[0] >= FLT_EPSILON ?
|
||||
w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
|
||||
(w.ptr<double>()[0] >= DBL_EPSILON ?
|
||||
w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
|
||||
}
|
||||
|
||||
CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
|
||||
@@ -988,7 +988,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
|
||||
|
||||
if( n <= 3 )
|
||||
{
|
||||
uchar* srcdata = src.data;
|
||||
const uchar* srcdata = src.data;
|
||||
uchar* dstdata = dst.data;
|
||||
size_t srcstep = src.step;
|
||||
size_t dststep = dst.step;
|
||||
@@ -1212,8 +1212,8 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth
|
||||
#define bf(y) ((float*)(bdata + y*src2step))[0]
|
||||
#define bd(y) ((double*)(bdata + y*src2step))[0]
|
||||
|
||||
uchar* srcdata = src.data;
|
||||
uchar* bdata = _src2.data;
|
||||
const uchar* srcdata = src.data;
|
||||
const uchar* bdata = _src2.data;
|
||||
uchar* dstdata = dst.data;
|
||||
size_t srcstep = src.step;
|
||||
size_t src2step = _src2.step;
|
||||
@@ -1557,13 +1557,17 @@ static void _SVDcompute( InputArray _aarr, OutputArray _w,
|
||||
{
|
||||
if( !at )
|
||||
{
|
||||
transpose(temp_u, _u);
|
||||
temp_v.copyTo(_vt);
|
||||
if( _u.needed() )
|
||||
transpose(temp_u, _u);
|
||||
if( _vt.needed() )
|
||||
temp_v.copyTo(_vt);
|
||||
}
|
||||
else
|
||||
{
|
||||
transpose(temp_v, _u);
|
||||
temp_u.copyTo(_vt);
|
||||
if( _u.needed() )
|
||||
transpose(temp_v, _u);
|
||||
if( _vt.needed() )
|
||||
temp_u.copyTo(_vt);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1705,7 +1709,7 @@ cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
|
||||
eigen(src, evals, evects);
|
||||
if( evects0.data != evects.data )
|
||||
{
|
||||
uchar* p = evects0.data;
|
||||
const uchar* p = evects0.data;
|
||||
evects.convertTo(evects0, evects0.type());
|
||||
CV_Assert( p == evects0.data );
|
||||
}
|
||||
@@ -1714,7 +1718,7 @@ cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
|
||||
eigen(src, evals);
|
||||
if( evals0.data != evals.data )
|
||||
{
|
||||
uchar* p = evals0.data;
|
||||
const uchar* p = evals0.data;
|
||||
if( evals0.size() == evals.size() )
|
||||
evals.convertTo(evals0, evals0.type());
|
||||
else if( evals0.type() == evals.type() )
|
||||
|
1119
modules/core/src/lda.cpp
Normal file
1119
modules/core/src/lda.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
@@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
|
||||
|
||||
namespace cv
|
||||
@@ -3295,7 +3295,6 @@ void cv::PCABackProject(InputArray data, InputArray mean,
|
||||
pca.backProject(data, result);
|
||||
}
|
||||
|
||||
|
||||
/****************************************************************************************\
|
||||
* Earlier API *
|
||||
\****************************************************************************************/
|
||||
|
@@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
#include "bufferpool.impl.hpp"
|
||||
|
||||
@@ -346,7 +346,7 @@ static void finalizeHdr(Mat& m)
|
||||
if( d > 2 )
|
||||
m.rows = m.cols = -1;
|
||||
if(m.u)
|
||||
m.data = m.datastart = m.u->data;
|
||||
m.datastart = m.data = m.u->data;
|
||||
if( m.data )
|
||||
{
|
||||
m.datalimit = m.datastart + m.size[0]*m.step[0];
|
||||
@@ -510,7 +510,7 @@ Mat::Mat(int _dims, const int* _sizes, int _type, void* _data, const size_t* _st
|
||||
datalimit(0), allocator(0), u(0), size(&rows)
|
||||
{
|
||||
flags |= CV_MAT_TYPE(_type);
|
||||
data = datastart = (uchar*)_data;
|
||||
datastart = data = (uchar*)_data;
|
||||
setSize(*this, _dims, _sizes, _steps, true);
|
||||
finalizeHdr(*this);
|
||||
}
|
||||
@@ -549,7 +549,7 @@ static Mat cvMatNDToMat(const CvMatND* m, bool copyData)
|
||||
|
||||
if( !m )
|
||||
return thiz;
|
||||
thiz.data = thiz.datastart = m->data.ptr;
|
||||
thiz.datastart = thiz.data = m->data.ptr;
|
||||
thiz.flags |= CV_MAT_TYPE(m->type);
|
||||
int _sizes[CV_MAX_DIM];
|
||||
size_t _steps[CV_MAX_DIM];
|
||||
@@ -587,7 +587,7 @@ static Mat cvMatToMat(const CvMat* m, bool copyData)
|
||||
thiz.dims = 2;
|
||||
thiz.rows = m->rows;
|
||||
thiz.cols = m->cols;
|
||||
thiz.data = thiz.datastart = m->data.ptr;
|
||||
thiz.datastart = thiz.data = m->data.ptr;
|
||||
size_t esz = CV_ELEM_SIZE(m->type), minstep = thiz.cols*esz, _step = m->step;
|
||||
if( _step == 0 )
|
||||
_step = minstep;
|
||||
@@ -597,7 +597,7 @@ static Mat cvMatToMat(const CvMat* m, bool copyData)
|
||||
}
|
||||
else
|
||||
{
|
||||
thiz.data = thiz.datastart = thiz.dataend = 0;
|
||||
thiz.datastart = thiz.dataend = thiz.data = 0;
|
||||
Mat(m->rows, m->cols, m->type, m->data.ptr, m->step).copyTo(thiz);
|
||||
}
|
||||
|
||||
@@ -636,7 +636,7 @@ static Mat iplImageToMat(const IplImage* img, bool copyData)
|
||||
m.rows = img->roi->height;
|
||||
m.cols = img->roi->width;
|
||||
esz = CV_ELEM_SIZE(m.flags);
|
||||
m.data = m.datastart = (uchar*)img->imageData +
|
||||
m.datastart = m.data = (uchar*)img->imageData +
|
||||
(selectedPlane ? (img->roi->coi - 1)*m.step*img->height : 0) +
|
||||
img->roi->yOffset*m.step[0] + img->roi->xOffset*esz;
|
||||
}
|
||||
@@ -2758,15 +2758,18 @@ namespace cv {
|
||||
|
||||
static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s )
|
||||
{
|
||||
int type = _m.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = cn;
|
||||
if (cn == 1)
|
||||
int type = _m.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = cn, rowsPerWI = 1;
|
||||
int sctype = CV_MAKE_TYPE(depth, cn == 3 ? 4 : cn);
|
||||
if (ocl::Device::getDefault().isIntel())
|
||||
{
|
||||
kercn = std::min(ocl::predictOptimalVectorWidth(_m), 4);
|
||||
if (kercn != 4)
|
||||
kercn = 1;
|
||||
rowsPerWI = 4;
|
||||
if (cn == 1)
|
||||
{
|
||||
kercn = std::min(ocl::predictOptimalVectorWidth(_m), 4);
|
||||
if (kercn != 4)
|
||||
kercn = 1;
|
||||
}
|
||||
}
|
||||
int sctype = CV_MAKE_TYPE(depth, cn == 3 ? 4 : cn),
|
||||
rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
|
||||
|
||||
ocl::Kernel k("setIdentity", ocl::core::set_identity_oclsrc,
|
||||
format("-D T=%s -D T1=%s -D cn=%d -D ST=%s -D kercn=%d -D rowsPerWI=%d",
|
||||
@@ -5529,14 +5532,14 @@ double norm( const SparseMat& src, int normType )
|
||||
{
|
||||
if( normType == NORM_INF )
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
result = std::max(result, std::abs((double)*(const float*)it.ptr));
|
||||
result = std::max(result, std::abs((double)it.value<float>()));
|
||||
else if( normType == NORM_L1 )
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
result += std::abs(*(const float*)it.ptr);
|
||||
result += std::abs(it.value<float>());
|
||||
else
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
{
|
||||
double v = *(const float*)it.ptr;
|
||||
double v = it.value<float>();
|
||||
result += v*v;
|
||||
}
|
||||
}
|
||||
@@ -5544,14 +5547,14 @@ double norm( const SparseMat& src, int normType )
|
||||
{
|
||||
if( normType == NORM_INF )
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
result = std::max(result, std::abs(*(const double*)it.ptr));
|
||||
result = std::max(result, std::abs(it.value<double>()));
|
||||
else if( normType == NORM_L1 )
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
result += std::abs(*(const double*)it.ptr);
|
||||
result += std::abs(it.value<double>());
|
||||
else
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
{
|
||||
double v = *(const double*)it.ptr;
|
||||
double v = it.value<double>();
|
||||
result += v*v;
|
||||
}
|
||||
}
|
||||
@@ -5575,7 +5578,7 @@ void minMaxLoc( const SparseMat& src, double* _minval, double* _maxval, int* _mi
|
||||
float minval = FLT_MAX, maxval = -FLT_MAX;
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
{
|
||||
float v = *(const float*)it.ptr;
|
||||
float v = it.value<float>();
|
||||
if( v < minval )
|
||||
{
|
||||
minval = v;
|
||||
@@ -5597,7 +5600,7 @@ void minMaxLoc( const SparseMat& src, double* _minval, double* _maxval, int* _mi
|
||||
double minval = DBL_MAX, maxval = -DBL_MAX;
|
||||
for( i = 0; i < N; i++, ++it )
|
||||
{
|
||||
double v = *(const double*)it.ptr;
|
||||
double v = it.value<double>();
|
||||
if( v < minval )
|
||||
{
|
||||
minval = v;
|
||||
|
@@ -57,6 +57,28 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
// TODO Move to some common place
|
||||
static bool getBoolParameter(const char* name, bool defaultValue)
|
||||
{
|
||||
const char* envValue = getenv(name);
|
||||
if (envValue == NULL)
|
||||
{
|
||||
return defaultValue;
|
||||
}
|
||||
cv::String value = envValue;
|
||||
if (value == "1" || value == "True" || value == "true" || value == "TRUE")
|
||||
{
|
||||
return true;
|
||||
}
|
||||
if (value == "0" || value == "False" || value == "false" || value == "FALSE")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
|
||||
}
|
||||
|
||||
|
||||
// TODO Move to some common place
|
||||
static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
|
||||
{
|
||||
@@ -1305,7 +1327,18 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
|
||||
#ifdef _DEBUG
|
||||
#define CV_OclDbgAssert CV_DbgAssert
|
||||
#else
|
||||
#define CV_OclDbgAssert(expr) (void)(expr)
|
||||
static bool isRaiseError()
|
||||
{
|
||||
static bool initialized = false;
|
||||
static bool value = false;
|
||||
if (!initialized)
|
||||
{
|
||||
value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false);
|
||||
initialized = true;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
#define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
|
||||
#endif
|
||||
|
||||
namespace cv { namespace ocl {
|
||||
@@ -4711,4 +4744,16 @@ void* Image2D::ptr() const
|
||||
return p ? p->handle : 0;
|
||||
}
|
||||
|
||||
bool isPerformanceCheckBypassed()
|
||||
{
|
||||
static bool initialized = false;
|
||||
static bool value = false;
|
||||
if (!initialized)
|
||||
{
|
||||
value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
|
||||
initialized = true;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
}}
|
||||
|
@@ -424,7 +424,7 @@ void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const
|
||||
const int x3 = x2 + t/3;
|
||||
float2 a0, a1, a2, a3, a4, a5, a6, a7, a8;
|
||||
|
||||
if (x1 < t/2)
|
||||
if (x1 < t/3)
|
||||
{
|
||||
a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t];
|
||||
a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t];
|
||||
@@ -433,7 +433,7 @@ void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (x1 < t/2)
|
||||
if (x1 < t/3)
|
||||
{
|
||||
butterfly3(a0, a1, a2, smem, twiddles, x1, block_size);
|
||||
butterfly3(a3, a4, a5, smem, twiddles, x2, block_size);
|
||||
|
@@ -59,7 +59,7 @@ __kernel void meanStdDev(__global const uchar * srcptr, int src_step, int src_of
|
||||
for (int grain = groups * WGS; id < total; id += grain)
|
||||
{
|
||||
#ifdef HAVE_MASK
|
||||
#ifdef HAVE_SRC_CONT
|
||||
#ifdef HAVE_MASK_CONT
|
||||
int mask_index = id;
|
||||
#else
|
||||
int mask_index = mad24(id / cols, mask_step, id % cols);
|
||||
|
@@ -39,7 +39,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "../../precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CLAMDBLAS
|
||||
|
||||
|
@@ -39,7 +39,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "../../precomp.hpp"
|
||||
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
|
||||
|
@@ -39,7 +39,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "../../precomp.hpp"
|
||||
|
||||
#if defined(HAVE_OPENCL) && !defined(HAVE_OPENCL_STATIC)
|
||||
|
||||
|
@@ -44,7 +44,7 @@
|
||||
#include <climits>
|
||||
#include <limits>
|
||||
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@@ -568,7 +568,7 @@ cv::Scalar cv::sum( InputArray _src )
|
||||
{
|
||||
#ifdef HAVE_OPENCL
|
||||
Scalar _res;
|
||||
CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
|
||||
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
||||
ocl_sum(_src, _res, OCL_OP_SUM),
|
||||
_res)
|
||||
#endif
|
||||
@@ -719,7 +719,7 @@ int cv::countNonZero( InputArray _src )
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
int res = -1;
|
||||
CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
|
||||
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
||||
ocl_countNonZero(_src, res),
|
||||
res)
|
||||
#endif
|
||||
@@ -782,7 +782,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
|
||||
int type = src.type();
|
||||
if( !mask.empty() )
|
||||
{
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
|
||||
ippiMaskMeanFuncC1 ippFuncC1 =
|
||||
type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
|
||||
type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
|
||||
@@ -795,7 +795,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
|
||||
return Scalar(res);
|
||||
setIppErrorStatus();
|
||||
}
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
|
||||
ippiMaskMeanFuncC3 ippFuncC3 =
|
||||
type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
|
||||
type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
|
||||
@@ -918,7 +918,8 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
|
||||
isContinuous = _src.isContinuous();
|
||||
isContinuous = _src.isContinuous(),
|
||||
isMaskContinuous = _mask.isContinuous();
|
||||
const ocl::Device &defDev = ocl::Device::getDefault();
|
||||
int groups = defDev.maxComputeUnits();
|
||||
if (defDev.isIntel())
|
||||
@@ -943,13 +944,14 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
|
||||
|
||||
char cvt[2][40];
|
||||
String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
|
||||
" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s"
|
||||
" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
|
||||
" -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
|
||||
ocl::typeToStr(type), ocl::typeToStr(depth),
|
||||
ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth,
|
||||
ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth),
|
||||
ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]),
|
||||
cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
|
||||
isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
|
||||
ocl::convertTypeStr(depth, ddepth, cn, cvt[1]),
|
||||
(int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
|
||||
@@ -1025,7 +1027,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
|
||||
|
||||
void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
|
||||
{
|
||||
CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2,
|
||||
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
||||
ocl_meanStdDev(_src, _mean, _sdv, _mask))
|
||||
|
||||
Mat src = _src.getMat(), mask = _mask.getMat();
|
||||
@@ -1069,7 +1071,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
|
||||
int type = src.type();
|
||||
if( !mask.empty() )
|
||||
{
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *, Ipp64f *);
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
|
||||
ippiMaskMeanStdDevFuncC1 ippFuncC1 =
|
||||
type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
|
||||
type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
|
||||
@@ -1081,7 +1083,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
|
||||
return;
|
||||
setIppErrorStatus();
|
||||
}
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
|
||||
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
|
||||
ippiMaskMeanStdDevFuncC3 ippFuncC3 =
|
||||
type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
|
||||
type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
|
||||
@@ -1571,7 +1573,7 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
|
||||
CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
|
||||
(cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
|
||||
|
||||
CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
|
||||
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
|
||||
ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
|
||||
|
||||
Mat src = _src.getMat(), mask = _mask.getMat();
|
||||
@@ -2234,7 +2236,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
double _result = 0;
|
||||
CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
|
||||
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
|
||||
ocl_norm(_src, normType, _mask, _result),
|
||||
_result)
|
||||
#endif
|
||||
@@ -2594,7 +2596,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
double _result = 0;
|
||||
CV_OCL_RUN_(_src1.isUMat(),
|
||||
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
|
||||
ocl_norm(_src1, _src2, normType, _mask, _result),
|
||||
_result)
|
||||
#endif
|
||||
|
@@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencl_kernels_core.hpp"
|
||||
|
||||
///////////////////////////////// UMat implementation ///////////////////////////////
|
||||
|
||||
@@ -582,7 +582,7 @@ Mat UMat::getMat(int accessFlags) const
|
||||
hdr.flags = flags;
|
||||
hdr.u = u;
|
||||
hdr.datastart = u->data;
|
||||
hdr.data = hdr.datastart + offset;
|
||||
hdr.data = u->data + offset;
|
||||
hdr.datalimit = hdr.dataend = u->data + u->size;
|
||||
CV_XADD(&hdr.u->refcount, 1);
|
||||
return hdr;
|
||||
@@ -593,15 +593,16 @@ void* UMat::handle(int accessFlags) const
|
||||
if( !u )
|
||||
return 0;
|
||||
|
||||
if ((accessFlags & ACCESS_WRITE) != 0)
|
||||
u->markHostCopyObsolete(true);
|
||||
|
||||
// check flags: if CPU copy is newer, copy it back to GPU.
|
||||
if( u->deviceCopyObsolete() )
|
||||
{
|
||||
CV_Assert(u->refcount == 0);
|
||||
u->currAllocator->unmap(u);
|
||||
}
|
||||
|
||||
if ((accessFlags & ACCESS_WRITE) != 0)
|
||||
u->markHostCopyObsolete(true);
|
||||
|
||||
return u->handle;
|
||||
}
|
||||
|
||||
|
@@ -39,7 +39,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "../test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#include <cmath>
|
||||
@@ -157,6 +157,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
|
||||
Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC1, 0, 2);
|
||||
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
|
||||
*mask.ptr(0) = 255; // prevent test case with mask filled 0 only
|
||||
|
||||
val = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0),
|
||||
rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0));
|
||||
@@ -1419,7 +1420,7 @@ OCL_TEST_P(UMatDot, Mat)
|
||||
OCL_OFF(const double cpuRes = src1_roi.dot(src2_roi));
|
||||
OCL_ON(const double gpuRes = usrc1_roi.dot(usrc2_roi));
|
||||
|
||||
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
|
||||
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-5);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1749,7 +1750,7 @@ OCL_TEST_P(ReduceAvg, Mat)
|
||||
OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_AVG, dtype));
|
||||
OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_AVG, dtype));
|
||||
|
||||
double eps = ddepth <= CV_32S ? 1 : 5e-6;
|
||||
double eps = ddepth <= CV_32S ? 1 : 6e-6;
|
||||
OCL_EXPECT_MATS_NEAR(dst, eps);
|
||||
}
|
||||
}
|
||||
|
@@ -44,7 +44,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "../test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
@@ -105,6 +105,7 @@ PARAM_TEST_CASE(Merge, MatDepth, int, bool)
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src3);
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src4);
|
||||
|
||||
src_roi.clear(); usrc_roi.clear(); // for test_loop_times > 1
|
||||
src_roi.push_back(src1_roi), usrc_roi.push_back(usrc1_roi);
|
||||
if (nsrc >= 2)
|
||||
src_roi.push_back(src2_roi), usrc_roi.push_back(usrc2_roi);
|
||||
|
@@ -43,7 +43,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "../test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
@@ -108,7 +108,7 @@ OCL_TEST_P(Dft, Mat)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
int nonzero_rows = hint ? src.cols - randomInt(1, src.rows-1) : 0;
|
||||
int nonzero_rows = hint ? src.rows - randomInt(1, src.rows-1) : 0;
|
||||
OCL_OFF(cv::dft(src, dst, dft_flags, nonzero_rows));
|
||||
OCL_ON(cv::dft(usrc, udst, dft_flags, nonzero_rows));
|
||||
|
||||
@@ -175,7 +175,7 @@ OCL_TEST_P(MulSpectrums, Mat)
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool()));
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)),
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(45, 72), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)),
|
||||
Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R),
|
||||
Bool(), // DFT_INVERSE
|
||||
Bool(), // DFT_ROWS
|
||||
|
@@ -42,7 +42,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "../test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -5,7 +5,7 @@
|
||||
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "../test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
@@ -44,7 +44,7 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "../test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
@@ -96,7 +96,7 @@ OCL_TEST_P(ConvertTo, Accuracy)
|
||||
OCL_OFF(src_roi.convertTo(dst_roi, dstType, alpha, beta));
|
||||
OCL_ON(usrc_roi.convertTo(udst_roi, dstType, alpha, beta));
|
||||
|
||||
double eps = src_depth >= CV_32F || CV_MAT_DEPTH(dstType) >= CV_32F ? 1e-4 : 1;
|
||||
double eps = CV_MAT_DEPTH(dstType) >= CV_32F ? 2e-4 : 1;
|
||||
OCL_EXPECT_MATS_NEAR(dst, eps);
|
||||
}
|
||||
}
|
||||
@@ -121,7 +121,7 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool)
|
||||
use_mask = GET_PARAM(3);
|
||||
}
|
||||
|
||||
void generateTestData()
|
||||
void generateTestData(bool one_cn_mask = false)
|
||||
{
|
||||
const int type = CV_MAKE_TYPE(depth, cn);
|
||||
|
||||
@@ -132,9 +132,11 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool)
|
||||
if (use_mask)
|
||||
{
|
||||
Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
int mask_cn = randomDouble(0.0, 2.0) > 1.0 ? cn : 1;
|
||||
int mask_cn = 1;
|
||||
if (!one_cn_mask && randomDouble(0.0, 2.0) > 1.0)
|
||||
mask_cn = cn;
|
||||
randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC(mask_cn), 0, 2);
|
||||
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
|
||||
cv::threshold(mask, mask, 0.5, 255., THRESH_BINARY);
|
||||
}
|
||||
|
||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
@@ -177,7 +179,7 @@ OCL_TEST_P(SetTo, Accuracy)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
generateTestData(true); // see modules/core/src/umatrix.cpp Ln:791 => CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
|
||||
|
||||
if (use_mask)
|
||||
{
|
||||
|
@@ -649,6 +649,16 @@ static void setValue(SparseMat& M, const int* idx, double value, RNG& rng)
|
||||
CV_Error(CV_StsUnsupportedFormat, "");
|
||||
}
|
||||
|
||||
template<typename Pixel>
|
||||
struct InitializerFunctor{
|
||||
/// Initializer for cv::Mat::forEach test
|
||||
void operator()(Pixel & pixel, const int * idx) const {
|
||||
pixel.x = idx[0];
|
||||
pixel.y = idx[1];
|
||||
pixel.z = idx[2];
|
||||
}
|
||||
};
|
||||
|
||||
void Core_ArrayOpTest::run( int /* start_from */)
|
||||
{
|
||||
int errcount = 0;
|
||||
@@ -686,6 +696,45 @@ void Core_ArrayOpTest::run( int /* start_from */)
|
||||
errcount++;
|
||||
}
|
||||
}
|
||||
// test cv::Mat::forEach
|
||||
{
|
||||
const int dims[3] = { 101, 107, 7 };
|
||||
typedef cv::Point3i Pixel;
|
||||
|
||||
cv::Mat a = cv::Mat::zeros(3, dims, CV_32SC3);
|
||||
InitializerFunctor<Pixel> initializer;
|
||||
|
||||
a.forEach<Pixel>(initializer);
|
||||
|
||||
uint64 total = 0;
|
||||
bool error_reported = false;
|
||||
for (int i0 = 0; i0 < dims[0]; ++i0) {
|
||||
for (int i1 = 0; i1 < dims[1]; ++i1) {
|
||||
for (int i2 = 0; i2 < dims[2]; ++i2) {
|
||||
Pixel& pixel = a.at<Pixel>(i0, i1, i2);
|
||||
if (pixel.x != i0 || pixel.y != i1 || pixel.z != i2) {
|
||||
if (!error_reported) {
|
||||
ts->printf(cvtest::TS::LOG, "forEach is not correct.\n"
|
||||
"First error detected at (%d, %d, %d).\n", pixel.x, pixel.y, pixel.z);
|
||||
error_reported = true;
|
||||
}
|
||||
errcount++;
|
||||
}
|
||||
total += pixel.x;
|
||||
total += pixel.y;
|
||||
total += pixel.z;
|
||||
}
|
||||
}
|
||||
}
|
||||
uint64 total2 = 0;
|
||||
for (size_t i = 0; i < sizeof(dims) / sizeof(dims[0]); ++i) {
|
||||
total2 += ((dims[i] - 1) * dims[i] / 2) * dims[0] * dims[1] * dims[2] / dims[i];
|
||||
}
|
||||
if (total != total2) {
|
||||
ts->printf(cvtest::TS::LOG, "forEach is not correct because total is invalid.\n");
|
||||
errcount++;
|
||||
}
|
||||
}
|
||||
|
||||
RNG rng;
|
||||
const int MAX_DIM = 5, MAX_DIM_SZ = 10;
|
||||
|
@@ -745,6 +745,24 @@ TEST(UMat, Sync)
|
||||
EXPECT_EQ(0, cvtest::norm(um.getMat(ACCESS_READ), cv::Mat(um.size(), um.type(), 19), NORM_INF));
|
||||
}
|
||||
|
||||
TEST(UMat, CopyToIfDeviceCopyIsObsolete)
|
||||
{
|
||||
UMat um(7, 2, CV_8UC1);
|
||||
Mat m(um.size(), um.type());
|
||||
m.setTo(Scalar::all(0));
|
||||
|
||||
{
|
||||
// make obsolete device copy of UMat
|
||||
Mat temp = um.getMat(ACCESS_WRITE);
|
||||
temp.setTo(Scalar::all(10));
|
||||
}
|
||||
|
||||
m.copyTo(um);
|
||||
um.setTo(Scalar::all(17));
|
||||
|
||||
EXPECT_EQ(0, cvtest::norm(um.getMat(ACCESS_READ), Mat(um.size(), um.type(), 17), NORM_INF));
|
||||
}
|
||||
|
||||
TEST(UMat, setOpenCL)
|
||||
{
|
||||
// save the current state
|
||||
|
Reference in New Issue
Block a user