Merge remote-tracking branch 'master' into stitch-fix

* 'master' of github.com:itseez/opencv: (82 commits)
  moved part of video to contrib/{outflow, bgsegm}; moved matlab to contrib
  added some basic functionality needed by the new face module (moved from the old "contrib")
  moved to the new opencv_contrib/face module
  fixed various warnings and obvious errors reported by clang compiler and the coverity tool.
  Fixed review comment from Vadim Pisarevsky
  modified farneback sample to use T-API
  ECC patch by the author (G. Evangelidis); fixed some OCL Farneback optical flow test failures on Mac
  small fix for GaussianBlur ocl test
  fix binary package build
  small fix for ocl_resize
  fix IOS framework
  fixed test ocl_MatchTemplate for sparse matrix
  Fixed typos
  fixing error, wrong template method param.
  fixing Mac build
  some formal changes (generally adding constness)
  Fixed choice of kercn and rowsPerWI for non-Intel device.
  fixed nDiffs for CalcBackProject
  fixed tests for ocl_filter2d, ocl_matchTemplate, ocl_histogram.cpp
  Fixed issue: Mat::copyTo(UMat) if device copy is obsolete. Added test.
  ...

Conflicts:
	modules/core/include/opencv2/core/mat.inl.hpp
This commit is contained in:
mshabunin
2014-08-11 14:50:08 +04:00
346 changed files with 26116 additions and 422526 deletions

View File

@@ -1,6 +1,5 @@
set(the_description "The Core Functionality")
ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" OPTIONAL opencv_cudev)
ocv_module_include_directories(${ZLIB_INCLUDE_DIRS})
if(HAVE_WINRT_CX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW")
@@ -19,11 +18,11 @@ file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "incl
source_group("Cuda Headers" FILES ${lib_cuda_hdrs})
source_group("Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail})
ocv_glob_module_sources(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
ocv_glob_module_sources(SOURCES "${OPENCV_MODULE_opencv_core_BINARY_DIR}/version_string.inc"
HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
ocv_module_include_directories(${the_module} ${ZLIB_INCLUDE_DIRS})
ocv_create_module()
ocv_add_precompiled_headers(${the_module})
ocv_add_accuracy_tests()
ocv_add_perf_tests()

View File

@@ -845,7 +845,6 @@ For convenience, the following types from the OpenCV C API already have such a s
that calls the appropriate release function:
* ``CvCapture``
* :ocv:struct:`CvDTreeSplit`
* :ocv:struct:`CvFileStorage`
* ``CvHaarClassifierCascade``
* :ocv:struct:`CvMat`
@@ -2326,6 +2325,69 @@ Returns the matrix iterator and sets it to the after-last matrix element.
The methods return the matrix read-only or read-write iterators, set to the point following the last matrix element.
Mat::forEach
------------
Invoke with arguments functor, and runs the functor over all matrix element.
.. ocv:function:: template<typename _Tp, typename Functor> void Mat::forEach(Functor operation)
.. ocv:function:: template<typename _Tp, typename Functor> void Mat::forEach(Functor operation) const
The methos runs operation in parallel. Operation is passed by arguments. Operation have to be a function pointer, a function object or a lambda(C++11).
All of below operation is equal. Put 0xFF to first channel of all matrix elements. ::
Mat image(1920, 1080, CV_8UC3);
typedef cv::Point3_<uint8_t> Pixel;
// first. raw pointer access.
for (int r = 0; r < image.rows; ++r) {
Pixel* ptr = image.ptr<Pixel>(0, r);
const Pixel* ptr_end = ptr + image.cols;
for (; ptr != ptr_end; ++ptr) {
ptr->x = 255;
}
}
// Using MatIterator. (Simple but there are a Iterator's overhead)
for (Pixel &p : cv::Mat_<Pixel>(image)) {
p.x = 255;
}
// Parallel execution with function object.
struct Operator {
void operator ()(Pixel &pixel, const int * position) {
pixel.x = 255;
}
};
image.forEach<Pixel>(Operator());
// Parallel execution using C++11 lambda.
image.forEach<Pixel>([](Pixel &p, const int * position) -> void {
p.x = 255;
});
position parameter is index of current pixel. ::
// Creating 3D matrix (255 x 255 x 255) typed uint8_t,
// and initialize all elements by the value which equals elements position.
// i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3).
int sizes[] = { 255, 255, 255 };
typedef cv::Point3_<uint8_t> Pixel;
Mat_<Pixel> image = Mat::zeros(3, sizes, CV_8UC3);
image.forEachWithPosition([&](Pixel& pixel, const int position[]) -> void{
pixel.x = position[0];
pixel.y = position[1];
pixel.z = position[2];
});
Mat\_
-----
.. ocv:class:: Mat_

View File

@@ -690,7 +690,61 @@ public:
Mat mean; //!< mean value subtracted before the projection and added after the back projection
};
// Linear Discriminant Analysis
class CV_EXPORTS LDA
{
public:
// Initializes a LDA with num_components (default 0) and specifies how
// samples are aligned (default dataAsRow=true).
explicit LDA(int num_components = 0);
// Initializes and performs a Discriminant Analysis with Fisher's
// Optimization Criterion on given data in src and corresponding labels
// in labels. If 0 (or less) number of components are given, they are
// automatically determined for given data in computation.
LDA(InputArrayOfArrays src, InputArray labels, int num_components = 0);
// Serializes this object to a given filename.
void save(const String& filename) const;
// Deserializes this object from a given filename.
void load(const String& filename);
// Serializes this object to a given cv::FileStorage.
void save(FileStorage& fs) const;
// Deserializes this object from a given cv::FileStorage.
void load(const FileStorage& node);
// Destructor.
~LDA();
//! Compute the discriminants for data in src and labels.
void compute(InputArrayOfArrays src, InputArray labels);
// Projects samples into the LDA subspace.
Mat project(InputArray src);
// Reconstructs projections from the LDA subspace.
Mat reconstruct(InputArray src);
// Returns the eigenvectors of this LDA.
Mat eigenvectors() const { return _eigenvectors; }
// Returns the eigenvalues of this LDA.
Mat eigenvalues() const { return _eigenvalues; }
static Mat subspaceProject(InputArray W, InputArray mean, InputArray src);
static Mat subspaceReconstruct(InputArray W, InputArray mean, InputArray src);
protected:
bool _dataAsRow;
int _num_components;
Mat _eigenvectors;
Mat _eigenvalues;
void lda(InputArrayOfArrays src, InputArray labels);
};
/*!
Singular Value Decomposition class

View File

@@ -261,8 +261,8 @@ public:
int* refcount;
//! helper fields used in locateROI and adjustROI
uchar* datastart;
uchar* dataend;
const uchar* datastart;
const uchar* dataend;
//! allocator
Allocator* allocator;
@@ -349,8 +349,8 @@ public:
uchar* data;
int* refcount;
uchar* datastart;
uchar* dataend;
const uchar* datastart;
const uchar* dataend;
AllocType alloc_type;
};

View File

@@ -395,7 +395,7 @@ struct CV_EXPORTS UMatData
struct CV_EXPORTS UMatDataAutoLock
{
UMatDataAutoLock(UMatData* u);
explicit UMatDataAutoLock(UMatData* u);
~UMatDataAutoLock();
UMatData* u;
};
@@ -403,7 +403,7 @@ struct CV_EXPORTS UMatDataAutoLock
struct CV_EXPORTS MatSize
{
MatSize(int* _p);
explicit MatSize(int* _p);
Size operator()() const;
const int& operator[](int i) const;
int& operator[](int i);
@@ -417,7 +417,7 @@ struct CV_EXPORTS MatSize
struct CV_EXPORTS MatStep
{
MatStep();
MatStep(size_t s);
explicit MatStep(size_t s);
const size_t& operator[](int i) const;
size_t& operator[](int i);
operator size_t() const;
@@ -900,6 +900,11 @@ public:
template<typename _Tp> MatConstIterator_<_Tp> begin() const;
template<typename _Tp> MatConstIterator_<_Tp> end() const;
//! template methods for for operation over all matrix elements.
// the operations take care of skipping gaps in the end of rows (if any)
template<typename _Tp, typename Functor> void forEach(const Functor& operation);
template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
@@ -918,9 +923,9 @@ public:
uchar* data;
//! helper fields used in locateROI and adjustROI
uchar* datastart;
uchar* dataend;
uchar* datalimit;
const uchar* datastart;
const uchar* dataend;
const uchar* datalimit;
//! custom allocator
MatAllocator* allocator;
@@ -934,6 +939,7 @@ public:
MatStep step;
protected:
template<typename _Tp, typename Functor> void forEach_impl(const Functor& operation);
};
@@ -1043,6 +1049,11 @@ public:
const_iterator begin() const;
const_iterator end() const;
//! template methods for for operation over all matrix elements.
// the operations take care of skipping gaps in the end of rows (if any)
template<typename Functor> void forEach(const Functor& operation);
template<typename Functor> void forEach(const Functor& operation) const;
//! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
void create(int _rows, int _cols);
//! equivalent to Mat::create(_size, DataType<_Tp>::type)
@@ -1804,9 +1815,9 @@ public:
//! copy operator
MatConstIterator& operator = (const MatConstIterator& it);
//! returns the current matrix element
uchar* operator *() const;
const uchar* operator *() const;
//! returns the i-th matrix element, relative to the current
uchar* operator [](ptrdiff_t i) const;
const uchar* operator [](ptrdiff_t i) const;
//! shifts the iterator forward by the specified number of elements
MatConstIterator& operator += (ptrdiff_t ofs);
@@ -1831,9 +1842,9 @@ public:
const Mat* m;
size_t elemSize;
uchar* ptr;
uchar* sliceStart;
uchar* sliceEnd;
const uchar* ptr;
const uchar* sliceStart;
const uchar* sliceEnd;
};
@@ -1917,9 +1928,9 @@ public:
//! constructor that sets the iterator to the specified element of the matrix
MatIterator_(Mat_<_Tp>* _m, int _row, int _col=0);
//! constructor that sets the iterator to the specified element of the matrix
MatIterator_(const Mat_<_Tp>* _m, Point _pt);
MatIterator_(Mat_<_Tp>* _m, Point _pt);
//! constructor that sets the iterator to the specified element of the matrix
MatIterator_(const Mat_<_Tp>* _m, const int* _idx);
MatIterator_(Mat_<_Tp>* _m, const int* _idx);
//! copy constructor
MatIterator_(const MatIterator_& it);
//! copy operator

View File

@@ -438,7 +438,7 @@ Mat::Mat(const std::vector<_Tp>& vec, bool copyData)
if( !copyData )
{
step[0] = step[1] = sizeof(_Tp);
data = datastart = (uchar*)&vec[0];
datastart = data = (uchar*)&vec[0];
datalimit = dataend = datastart + rows * step[0];
}
else
@@ -453,7 +453,7 @@ Mat::Mat(const Vec<_Tp, n>& vec, bool copyData)
if( !copyData )
{
step[0] = step[1] = sizeof(_Tp);
data = datastart = (uchar*)vec.val;
datastart = data = (uchar*)vec.val;
datalimit = dataend = datastart + rows * step[0];
}
else
@@ -470,7 +470,7 @@ Mat::Mat(const Matx<_Tp,m,n>& M, bool copyData)
{
step[0] = cols * sizeof(_Tp);
step[1] = sizeof(_Tp);
data = datastart = (uchar*)M.val;
datastart = data = (uchar*)M.val;
datalimit = dataend = datastart + rows * step[0];
}
else
@@ -485,7 +485,7 @@ Mat::Mat(const Point_<_Tp>& pt, bool copyData)
if( !copyData )
{
step[0] = step[1] = sizeof(_Tp);
data = datastart = (uchar*)&pt.x;
datastart = data = (uchar*)&pt.x;
datalimit = dataend = datastart + rows * step[0];
}
else
@@ -504,7 +504,7 @@ Mat::Mat(const Point3_<_Tp>& pt, bool copyData)
if( !copyData )
{
step[0] = step[1] = sizeof(_Tp);
data = datastart = (uchar*)&pt.x;
datastart = data = (uchar*)&pt.x;
datalimit = dataend = datastart + rows * step[0];
}
else
@@ -642,7 +642,7 @@ inline void Mat::release()
if( u && CV_XADD(&u->refcount, -1) == 1 )
deallocate();
u = NULL;
data = datastart = dataend = datalimit = 0;
datastart = dataend = datalimit = data = 0;
for(int i = 0; i < dims; i++)
size.p[i] = 0;
}
@@ -1000,6 +1000,17 @@ MatIterator_<_Tp> Mat::end()
return it;
}
template<typename _Tp, typename Functor> inline
void Mat::forEach(const Functor& operation) {
this->forEach_impl<_Tp>(operation);
};
template<typename _Tp, typename Functor> inline
void Mat::forEach(const Functor& operation) const {
// call as not const
(const_cast<Mat*>(this))->forEach<const _Tp>(operation);
};
template<typename _Tp> inline
Mat::operator std::vector<_Tp>() const
{
@@ -1045,7 +1056,7 @@ void Mat::push_back(const _Tp& elem)
}
CV_Assert(DataType<_Tp>::type == type() && cols == 1
/* && dims == 2 (cols == 1 implies dims == 2) */);
uchar* tmp = dataend + step[0];
const uchar* tmp = dataend + step[0];
if( !isSubmatrix() && isContinuous() && tmp <= datalimit )
{
*(_Tp*)(data + (size.p[0]++) * step.p[0]) = elem;
@@ -1585,6 +1596,15 @@ MatIterator_<_Tp> Mat_<_Tp>::end()
return Mat::end<_Tp>();
}
template<typename _Tp> template<typename Functor> inline
void Mat_<_Tp>::forEach(const Functor& operation) {
Mat::forEach<_Tp, Functor>(operation);
}
template<typename _Tp> template<typename Functor> inline
void Mat_<_Tp>::forEach(const Functor& operation) const {
Mat::forEach<_Tp, Functor>(operation);
}
///////////////////////////// SparseMat /////////////////////////////
@@ -2149,7 +2169,7 @@ MatConstIterator& MatConstIterator::operator = (const MatConstIterator& it )
}
inline
uchar* MatConstIterator::operator *() const
const uchar* MatConstIterator::operator *() const
{
return ptr;
}
@@ -2282,7 +2302,7 @@ MatConstIterator operator - (const MatConstIterator& a, ptrdiff_t ofs)
inline
uchar* MatConstIterator::operator [](ptrdiff_t i) const
const uchar* MatConstIterator::operator [](ptrdiff_t i) const
{
return *(*this + i);
}
@@ -2454,12 +2474,12 @@ MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, int _row, int _col)
{}
template<typename _Tp> inline
MatIterator_<_Tp>::MatIterator_(const Mat_<_Tp>* _m, Point _pt)
MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, Point _pt)
: MatConstIterator_<_Tp>(_m, _pt)
{}
template<typename _Tp> inline
MatIterator_<_Tp>::MatIterator_(const Mat_<_Tp>* _m, const int* _idx)
MatIterator_<_Tp>::MatIterator_(Mat_<_Tp>* _m, const int* _idx)
: MatConstIterator_<_Tp>(_m, _idx)
{}
@@ -2593,7 +2613,7 @@ inline SparseMatConstIterator& SparseMatConstIterator::operator = (const SparseM
template<typename _Tp> inline
const _Tp& SparseMatConstIterator::value() const
{
return *(_Tp*)ptr;
return *(const _Tp*)ptr;
}
inline

View File

@@ -636,6 +636,9 @@ protected:
CV_EXPORTS MatAllocator* getOpenCLAllocator();
CV_EXPORTS_W bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
}}
#endif

View File

@@ -274,6 +274,102 @@ public:
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
/////////////////////////////// forEach method of cv::Mat ////////////////////////////
template<typename _Tp, typename Functor> inline
void Mat::forEach_impl(const Functor& operation) {
if (false) {
operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(NULL));
// If your compiler fail in this line.
// Please check that your functor signature is
// (_Tp&, const int*) <- multidimential
// or (_Tp&, void*) <- in case of you don't need current idx.
}
CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
class PixelOperationWrapper :public ParallelLoopBody
{
public:
PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
: mat(frame), op(_operation) {};
virtual ~PixelOperationWrapper(){};
// ! Overloaded virtual operator
// convert range call to row call.
virtual void operator()(const Range &range) const {
const int DIMS = mat->dims;
const int COLS = mat->size[DIMS - 1];
if (DIMS <= 2) {
for (int row = range.start; row < range.end; ++row) {
this->rowCall2(row, COLS);
}
} else {
std::vector<int> idx(COLS); /// idx is modified in this->rowCall
idx[DIMS - 2] = range.start - 1;
for (int line_num = range.start; line_num < range.end; ++line_num) {
idx[DIMS - 2]++;
for (int i = DIMS - 2; i >= 0; --i) {
if (idx[i] >= mat->size[i]) {
idx[i - 1] += idx[i] / mat->size[i];
idx[i] %= mat->size[i];
continue; // carry-over;
}
else {
break;
}
}
this->rowCall(&idx[0], COLS, DIMS);
}
}
};
private:
Mat_<_Tp>* const mat;
const Functor op;
// ! Call operator for each elements in this row.
inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
int &col = idx[DIMS - 1];
col = 0;
_Tp* pixel = &(mat->template at<_Tp>(idx));
while (col < COLS) {
op(*pixel, const_cast<const int*>(idx));
pixel++; col++;
}
col = 0;
}
// ! Call operator for each elements in this row. 2d mat special version.
inline void rowCall2(const int row, const int COLS) const {
union Index{
int body[2];
operator const int*() const {
return reinterpret_cast<const int*>(this);
}
int& operator[](const int i) {
return body[i];
}
} idx = {{row, 0}};
// Special union is needed to avoid
// "error: array subscript is above array bounds [-Werror=array-bounds]"
// when call the functor `op` such that access idx[3].
_Tp* pixel = &(mat->template at<_Tp>(idx));
const _Tp* const pixel_end = pixel + COLS;
while(pixel < pixel_end) {
op(*pixel++, static_cast<const int*>(idx));
idx[1]++;
}
};
PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
CV_Assert(false);
// We can not remove this implementation because Visual Studio warning C4822.
return *this;
};
};
parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
};
/////////////////////////// Synchronization Primitives ///////////////////////////////
class CV_EXPORTS Mutex

View File

@@ -39,7 +39,7 @@
//
//M*/
#include "perf_precomp.hpp"
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL

View File

@@ -4,7 +4,7 @@
//
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
#include "perf_precomp.hpp"
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL

View File

@@ -44,7 +44,7 @@
//
//M*/
#include "perf_precomp.hpp"
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL

View File

@@ -44,7 +44,7 @@
//
//M*/
#include "perf_precomp.hpp"
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL

View File

@@ -44,7 +44,7 @@
//
//M*/
#include "perf_precomp.hpp"
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL

View File

@@ -5,7 +5,7 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#include "perf_precomp.hpp"
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL

View File

@@ -4,7 +4,7 @@
//
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
#include "perf_precomp.hpp"
#include "../perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL

View File

@@ -47,7 +47,7 @@
// */
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
namespace cv
{
@@ -1607,7 +1607,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
#ifdef HAVE_OPENCL
bool use_opencl = _dst.isUMat() && dims1 <= 2 && dims2 <= 2;
bool use_opencl = OCL_PERFORMANCE_CHECK(_dst.isUMat()) && dims1 <= 2 && dims2 <= 2;
#endif
bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
@@ -2440,6 +2440,34 @@ addWeighted8u( const uchar* src1, size_t step1,
_mm_storel_epi64((__m128i*)(dst + x), u);
}
}
#elif CV_NEON
float32x4_t g = vdupq_n_f32 (gamma);
for( ; x <= size.width - 8; x += 8 )
{
uint8x8_t in1 = vld1_u8(src1+x);
uint16x8_t in1_16 = vmovl_u8(in1);
float32x4_t in1_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in1_16)));
float32x4_t in1_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in1_16)));
uint8x8_t in2 = vld1_u8(src2+x);
uint16x8_t in2_16 = vmovl_u8(in2);
float32x4_t in2_f_l = vcvtq_f32_u32(vmovl_u16(vget_low_u16(in2_16)));
float32x4_t in2_f_h = vcvtq_f32_u32(vmovl_u16(vget_high_u16(in2_16)));
float32x4_t out_f_l = vaddq_f32(vmulq_n_f32(in1_f_l, alpha), vmulq_n_f32(in2_f_l, beta));
float32x4_t out_f_h = vaddq_f32(vmulq_n_f32(in1_f_h, alpha), vmulq_n_f32(in2_f_h, beta));
out_f_l = vaddq_f32(out_f_l, g);
out_f_h = vaddq_f32(out_f_h, g);
uint16x4_t out_16_l = vqmovun_s32(vcvtq_s32_f32(out_f_l));
uint16x4_t out_16_h = vqmovun_s32(vcvtq_s32_f32(out_f_h));
uint16x8_t out_16 = vcombine_u16(out_16_l, out_16_h);
uint8x8_t out = vqmovn_u16(out_16);
vst1_u8(dst+x, out);
}
#endif
#if CV_ENABLE_UNROLLED
for( ; x <= size.width - 4; x += 4 )
@@ -2650,6 +2678,14 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
}
}
#elif CV_NEON
uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
for( ; x <= size.width - 16; x += 16 )
{
vst1q_u8(dst+x, veorq_u8(vcgtq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
}
#endif
for( ; x < size.width; x++ ){
@@ -2674,6 +2710,13 @@ static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t ste
_mm_storeu_si128((__m128i*)(dst + x), r00);
}
}
#elif CV_NEON
uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
for( ; x <= size.width - 16; x += 16 )
{
vst1q_u8(dst+x, veorq_u8(vceqq_u8(vld1q_u8(src1+x), vld1q_u8(src2+x)), mask));
}
#endif
for( ; x < size.width; x++ )
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
@@ -2759,6 +2802,22 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
x += 8;
}
}
#elif CV_NEON
uint8x16_t mask = code == CMP_GT ? vdupq_n_u8(0) : vdupq_n_u8(255);
for( ; x <= size.width - 16; x += 16 )
{
int16x8_t in1 = vld1q_s16(src1 + x);
int16x8_t in2 = vld1q_s16(src2 + x);
uint8x8_t t1 = vmovn_u16(vcgtq_s16(in1, in2));
in1 = vld1q_s16(src1 + x + 8);
in2 = vld1q_s16(src2 + x + 8);
uint8x8_t t2 = vmovn_u16(vcgtq_s16(in1, in2));
vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
}
#endif
for( ; x < size.width; x++ ){
@@ -2797,6 +2856,21 @@ static void cmp16s(const short* src1, size_t step1, const short* src2, size_t st
x += 8;
}
}
#elif CV_NEON
uint8x16_t mask = code == CMP_EQ ? vdupq_n_u8(0) : vdupq_n_u8(255);
for( ; x <= size.width - 16; x += 16 )
{
int16x8_t in1 = vld1q_s16(src1 + x);
int16x8_t in2 = vld1q_s16(src2 + x);
uint8x8_t t1 = vmovn_u16(vceqq_s16(in1, in2));
in1 = vld1q_s16(src1 + x + 8);
in2 = vld1q_s16(src2 + x + 8);
uint8x8_t t2 = vmovn_u16(vceqq_s16(in1, in2));
vst1q_u8(dst+x, veorq_u8(vcombine_u8(t1, t2), mask));
}
#endif
for( ; x < size.width; x++ )
dst[x] = (uchar)(-(src1[x] == src2[x]) ^ m);
@@ -2982,7 +3056,7 @@ void cv::compare(InputArray _src1, InputArray _src2, OutputArray _dst, int op)
haveScalar = true;
}
CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat(),
CV_OCL_RUN(_src1.dims() <= 2 && _src2.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
ocl_compare(_src1, _src2, _dst, op, haveScalar))
int kind1 = _src1.kind(), kind2 = _src2.kind();
@@ -3085,7 +3159,7 @@ namespace cv
{
template <typename T>
struct InRange_SSE
struct InRange_SIMD
{
int operator () (const T *, const T *, const T *, uchar *, int) const
{
@@ -3096,7 +3170,7 @@ struct InRange_SSE
#if CV_SSE2
template <>
struct InRange_SSE<uchar>
struct InRange_SIMD<uchar>
{
int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
uchar * dst, int len) const
@@ -3121,7 +3195,7 @@ struct InRange_SSE<uchar>
};
template <>
struct InRange_SSE<schar>
struct InRange_SIMD<schar>
{
int operator () (const schar * src1, const schar * src2, const schar * src3,
uchar * dst, int len) const
@@ -3146,7 +3220,7 @@ struct InRange_SSE<schar>
};
template <>
struct InRange_SSE<ushort>
struct InRange_SIMD<ushort>
{
int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
uchar * dst, int len) const
@@ -3172,7 +3246,7 @@ struct InRange_SSE<ushort>
};
template <>
struct InRange_SSE<short>
struct InRange_SIMD<short>
{
int operator () (const short * src1, const short * src2, const short * src3,
uchar * dst, int len) const
@@ -3198,7 +3272,7 @@ struct InRange_SSE<short>
};
template <>
struct InRange_SSE<int>
struct InRange_SIMD<int>
{
int operator () (const int * src1, const int * src2, const int * src3,
uchar * dst, int len) const
@@ -3230,7 +3304,7 @@ struct InRange_SSE<int>
};
template <>
struct InRange_SSE<float>
struct InRange_SIMD<float>
{
int operator () (const float * src1, const float * src2, const float * src3,
uchar * dst, int len) const
@@ -3261,6 +3335,160 @@ struct InRange_SSE<float>
}
};
#elif CV_NEON
template <>
struct InRange_SIMD<uchar>
{
int operator () (const uchar * src1, const uchar * src2, const uchar * src3,
uchar * dst, int len) const
{
int x = 0;
for ( ; x <= len - 16; x += 16 )
{
uint8x16_t values = vld1q_u8(src1 + x);
uint8x16_t low = vld1q_u8(src2 + x);
uint8x16_t high = vld1q_u8(src3 + x);
vst1q_u8(dst + x, vandq_u8(vcgeq_u8(values, low), vcgeq_u8(high, values)));
}
return x;
}
};
template <>
struct InRange_SIMD<schar>
{
int operator () (const schar * src1, const schar * src2, const schar * src3,
uchar * dst, int len) const
{
int x = 0;
for ( ; x <= len - 16; x += 16 )
{
int8x16_t values = vld1q_s8(src1 + x);
int8x16_t low = vld1q_s8(src2 + x);
int8x16_t high = vld1q_s8(src3 + x);
vst1q_u8(dst + x, vandq_u8(vcgeq_s8(values, low), vcgeq_s8(high, values)));
}
return x;
}
};
template <>
struct InRange_SIMD<ushort>
{
int operator () (const ushort * src1, const ushort * src2, const ushort * src3,
uchar * dst, int len) const
{
int x = 0;
for ( ; x <= len - 16; x += 16 )
{
uint16x8_t values = vld1q_u16((const uint16_t*)(src1 + x));
uint16x8_t low = vld1q_u16((const uint16_t*)(src2 + x));
uint16x8_t high = vld1q_u16((const uint16_t*)(src3 + x));
uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
values = vld1q_u16((const uint16_t*)(src1 + x + 8));
low = vld1q_u16((const uint16_t*)(src2 + x + 8));
high = vld1q_u16((const uint16_t*)(src3 + x + 8));
uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_u16(values, low), vcgeq_u16(high, values)));
vst1q_u8(dst + x, vcombine_u8(r1, r2));
}
return x;
}
};
template <>
struct InRange_SIMD<short>
{
int operator () (const short * src1, const short * src2, const short * src3,
uchar * dst, int len) const
{
int x = 0;
for ( ; x <= len - 16; x += 16 )
{
int16x8_t values = vld1q_s16((const int16_t*)(src1 + x));
int16x8_t low = vld1q_s16((const int16_t*)(src2 + x));
int16x8_t high = vld1q_s16((const int16_t*)(src3 + x));
uint8x8_t r1 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
values = vld1q_s16((const int16_t*)(src1 + x + 8));
low = vld1q_s16((const int16_t*)(src2 + x + 8));
high = vld1q_s16((const int16_t*)(src3 + x + 8));
uint8x8_t r2 = vmovn_u16(vandq_u16(vcgeq_s16(values, low), vcgeq_s16(high, values)));
vst1q_u8(dst + x, vcombine_u8(r1, r2));
}
return x;
}
};
template <>
struct InRange_SIMD<int>
{
int operator () (const int * src1, const int * src2, const int * src3,
uchar * dst, int len) const
{
int x = 0;
for ( ; x <= len - 8; x += 8 )
{
int32x4_t values = vld1q_s32((const int32_t*)(src1 + x));
int32x4_t low = vld1q_s32((const int32_t*)(src2 + x));
int32x4_t high = vld1q_s32((const int32_t*)(src3 + x));
uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
values = vld1q_s32((const int32_t*)(src1 + x + 4));
low = vld1q_s32((const int32_t*)(src2 + x + 4));
high = vld1q_s32((const int32_t*)(src3 + x + 4));
uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_s32(values, low), vcgeq_s32(high, values)));
uint16x8_t res_16 = vcombine_u16(r1, r2);
vst1_u8(dst + x, vmovn_u16(res_16));
}
return x;
}
};
template <>
struct InRange_SIMD<float>
{
int operator () (const float * src1, const float * src2, const float * src3,
uchar * dst, int len) const
{
int x = 0;
for ( ; x <= len - 8; x += 8 )
{
float32x4_t values = vld1q_f32((const float32_t*)(src1 + x));
float32x4_t low = vld1q_f32((const float32_t*)(src2 + x));
float32x4_t high = vld1q_f32((const float32_t*)(src3 + x));
uint16x4_t r1 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
values = vld1q_f32((const float32_t*)(src1 + x + 4));
low = vld1q_f32((const float32_t*)(src2 + x + 4));
high = vld1q_f32((const float32_t*)(src3 + x + 4));
uint16x4_t r2 = vmovn_u32(vandq_u32(vcgeq_f32(values, low), vcgeq_f32(high, values)));
uint16x8_t res_16 = vcombine_u16(r1, r2);
vst1_u8(dst + x, vmovn_u16(res_16));
}
return x;
}
};
#endif
template <typename T>
@@ -3272,7 +3500,7 @@ static void inRange_(const T* src1, size_t step1, const T* src2, size_t step2,
step2 /= sizeof(src2[0]);
step3 /= sizeof(src3[0]);
InRange_SSE<T> vop;
InRange_SIMD<T> vop;
for( ; size.height--; src1 += step1, src2 += step2, src3 += step3, dst += step )
{
@@ -3500,7 +3728,7 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
InputArray _upperb, OutputArray _dst)
{
CV_OCL_RUN(_src.dims() <= 2 && _lowerb.dims() <= 2 &&
_upperb.dims() <= 2 && _dst.isUMat(),
_upperb.dims() <= 2 && OCL_PERFORMANCE_CHECK(_dst.isUMat()),
ocl_inRange(_src, _lowerb, _upperb, _dst))
int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();

View File

@@ -41,7 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
namespace cv
{
@@ -50,6 +50,71 @@ namespace cv
* split & merge *
\****************************************************************************************/
#if CV_NEON
template<typename T> struct VSplit2;
template<typename T> struct VSplit3;
template<typename T> struct VSplit4;
#define SPLIT2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src, data_type* dst0, data_type* dst1){ \
reg_type r = load_func(src); \
store_func(dst0, r.val[0]); \
store_func(dst1, r.val[1]); \
} \
}
#define SPLIT3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
data_type* dst2){ \
reg_type r = load_func(src); \
store_func(dst0, r.val[0]); \
store_func(dst1, r.val[1]); \
store_func(dst2, r.val[2]); \
} \
}
#define SPLIT4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src, data_type* dst0, data_type* dst1, \
data_type* dst2, data_type* dst3){ \
reg_type r = load_func(src); \
store_func(dst0, r.val[0]); \
store_func(dst1, r.val[1]); \
store_func(dst2, r.val[2]); \
store_func(dst3, r.val[3]); \
} \
}
SPLIT2_KERNEL_TEMPLATE(VSplit2, uchar , uint8x16x2_t, vld2q_u8 , vst1q_u8 );
SPLIT2_KERNEL_TEMPLATE(VSplit2, schar , int8x16x2_t, vld2q_s8 , vst1q_s8 );
SPLIT2_KERNEL_TEMPLATE(VSplit2, ushort, uint16x8x2_t, vld2q_u16, vst1q_u16);
SPLIT2_KERNEL_TEMPLATE(VSplit2, short , int16x8x2_t, vld2q_s16, vst1q_s16);
SPLIT2_KERNEL_TEMPLATE(VSplit2, int , int32x4x2_t, vld2q_s32, vst1q_s32);
SPLIT2_KERNEL_TEMPLATE(VSplit2, float , float32x4x2_t, vld2q_f32, vst1q_f32);
SPLIT2_KERNEL_TEMPLATE(VSplit2, int64 , int64x1x2_t, vld2_s64 , vst1_s64 );
SPLIT3_KERNEL_TEMPLATE(VSplit3, uchar , uint8x16x3_t, vld3q_u8 , vst1q_u8 );
SPLIT3_KERNEL_TEMPLATE(VSplit3, schar , int8x16x3_t, vld3q_s8 , vst1q_s8 );
SPLIT3_KERNEL_TEMPLATE(VSplit3, ushort, uint16x8x3_t, vld3q_u16, vst1q_u16);
SPLIT3_KERNEL_TEMPLATE(VSplit3, short , int16x8x3_t, vld3q_s16, vst1q_s16);
SPLIT3_KERNEL_TEMPLATE(VSplit3, int , int32x4x3_t, vld3q_s32, vst1q_s32);
SPLIT3_KERNEL_TEMPLATE(VSplit3, float , float32x4x3_t, vld3q_f32, vst1q_f32);
SPLIT3_KERNEL_TEMPLATE(VSplit3, int64 , int64x1x3_t, vld3_s64 , vst1_s64 );
SPLIT4_KERNEL_TEMPLATE(VSplit4, uchar , uint8x16x4_t, vld4q_u8 , vst1q_u8 );
SPLIT4_KERNEL_TEMPLATE(VSplit4, schar , int8x16x4_t, vld4q_s8 , vst1q_s8 );
SPLIT4_KERNEL_TEMPLATE(VSplit4, ushort, uint16x8x4_t, vld4q_u16, vst1q_u16);
SPLIT4_KERNEL_TEMPLATE(VSplit4, short , int16x8x4_t, vld4q_s16, vst1q_s16);
SPLIT4_KERNEL_TEMPLATE(VSplit4, int , int32x4x4_t, vld4q_s32, vst1q_s32);
SPLIT4_KERNEL_TEMPLATE(VSplit4, float , float32x4x4_t, vld4q_f32, vst1q_f32);
SPLIT4_KERNEL_TEMPLATE(VSplit4, int64 , int64x1x4_t, vld4_s64 , vst1_s64 );
#endif
template<typename T> static void
split_( const T* src, T** dst, int len, int cn )
{
@@ -58,13 +123,34 @@ split_( const T* src, T** dst, int len, int cn )
if( k == 1 )
{
T* dst0 = dst[0];
for( i = j = 0; i < len; i++, j += cn )
dst0[i] = src[j];
if(cn == 1)
{
memcpy(dst0, src, len * sizeof(T));
}
else
{
for( i = 0, j = 0 ; i < len; i++, j += cn )
dst0[i] = src[j];
}
}
else if( k == 2 )
{
T *dst0 = dst[0], *dst1 = dst[1];
for( i = j = 0; i < len; i++, j += cn )
i = j = 0;
#if CV_NEON
if(cn == 2)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 2 * inc_i;
VSplit2<T> vsplit;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i);
}
#endif
for( ; i < len; i++, j += cn )
{
dst0[i] = src[j];
dst1[i] = src[j+1];
@@ -73,7 +159,20 @@ split_( const T* src, T** dst, int len, int cn )
else if( k == 3 )
{
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2];
for( i = j = 0; i < len; i++, j += cn )
i = j = 0;
#if CV_NEON
if(cn == 3)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 3 * inc_i;
VSplit3<T> vsplit;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i);
}
#endif
for( ; i < len; i++, j += cn )
{
dst0[i] = src[j];
dst1[i] = src[j+1];
@@ -83,7 +182,20 @@ split_( const T* src, T** dst, int len, int cn )
else
{
T *dst0 = dst[0], *dst1 = dst[1], *dst2 = dst[2], *dst3 = dst[3];
for( i = j = 0; i < len; i++, j += cn )
i = j = 0;
#if CV_NEON
if(cn == 4)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 4 * inc_i;
VSplit4<T> vsplit;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vsplit(src + j, dst0 + i, dst1 + i, dst2 + i, dst3 + i);
}
#endif
for( ; i < len; i++, j += cn )
{
dst0[i] = src[j]; dst1[i] = src[j+1];
dst2[i] = src[j+2]; dst3[i] = src[j+3];
@@ -101,6 +213,77 @@ split_( const T* src, T** dst, int len, int cn )
}
}
#if CV_NEON
template<typename T> struct VMerge2;
template<typename T> struct VMerge3;
template<typename T> struct VMerge4;
#define MERGE2_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src0, const data_type* src1, \
data_type* dst){ \
reg_type r; \
r.val[0] = load_func(src0); \
r.val[1] = load_func(src1); \
store_func(dst, r); \
} \
}
#define MERGE3_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src0, const data_type* src1, \
const data_type* src2, data_type* dst){ \
reg_type r; \
r.val[0] = load_func(src0); \
r.val[1] = load_func(src1); \
r.val[2] = load_func(src2); \
store_func(dst, r); \
} \
}
#define MERGE4_KERNEL_TEMPLATE(name, data_type, reg_type, load_func, store_func) \
template<> \
struct name<data_type>{ \
void operator()(const data_type* src0, const data_type* src1, \
const data_type* src2, const data_type* src3, \
data_type* dst){ \
reg_type r; \
r.val[0] = load_func(src0); \
r.val[1] = load_func(src1); \
r.val[2] = load_func(src2); \
r.val[3] = load_func(src3); \
store_func(dst, r); \
} \
}
MERGE2_KERNEL_TEMPLATE(VMerge2, uchar , uint8x16x2_t, vld1q_u8 , vst2q_u8 );
MERGE2_KERNEL_TEMPLATE(VMerge2, schar , int8x16x2_t, vld1q_s8 , vst2q_s8 );
MERGE2_KERNEL_TEMPLATE(VMerge2, ushort, uint16x8x2_t, vld1q_u16, vst2q_u16);
MERGE2_KERNEL_TEMPLATE(VMerge2, short , int16x8x2_t, vld1q_s16, vst2q_s16);
MERGE2_KERNEL_TEMPLATE(VMerge2, int , int32x4x2_t, vld1q_s32, vst2q_s32);
MERGE2_KERNEL_TEMPLATE(VMerge2, float , float32x4x2_t, vld1q_f32, vst2q_f32);
MERGE2_KERNEL_TEMPLATE(VMerge2, int64 , int64x1x2_t, vld1_s64 , vst2_s64 );
MERGE3_KERNEL_TEMPLATE(VMerge3, uchar , uint8x16x3_t, vld1q_u8 , vst3q_u8 );
MERGE3_KERNEL_TEMPLATE(VMerge3, schar , int8x16x3_t, vld1q_s8 , vst3q_s8 );
MERGE3_KERNEL_TEMPLATE(VMerge3, ushort, uint16x8x3_t, vld1q_u16, vst3q_u16);
MERGE3_KERNEL_TEMPLATE(VMerge3, short , int16x8x3_t, vld1q_s16, vst3q_s16);
MERGE3_KERNEL_TEMPLATE(VMerge3, int , int32x4x3_t, vld1q_s32, vst3q_s32);
MERGE3_KERNEL_TEMPLATE(VMerge3, float , float32x4x3_t, vld1q_f32, vst3q_f32);
MERGE3_KERNEL_TEMPLATE(VMerge3, int64 , int64x1x3_t, vld1_s64 , vst3_s64 );
MERGE4_KERNEL_TEMPLATE(VMerge4, uchar , uint8x16x4_t, vld1q_u8 , vst4q_u8 );
MERGE4_KERNEL_TEMPLATE(VMerge4, schar , int8x16x4_t, vld1q_s8 , vst4q_s8 );
MERGE4_KERNEL_TEMPLATE(VMerge4, ushort, uint16x8x4_t, vld1q_u16, vst4q_u16);
MERGE4_KERNEL_TEMPLATE(VMerge4, short , int16x8x4_t, vld1q_s16, vst4q_s16);
MERGE4_KERNEL_TEMPLATE(VMerge4, int , int32x4x4_t, vld1q_s32, vst4q_s32);
MERGE4_KERNEL_TEMPLATE(VMerge4, float , float32x4x4_t, vld1q_f32, vst4q_f32);
MERGE4_KERNEL_TEMPLATE(VMerge4, int64 , int64x1x4_t, vld1_s64 , vst4_s64 );
#endif
template<typename T> static void
merge_( const T** src, T* dst, int len, int cn )
{
@@ -115,7 +298,19 @@ merge_( const T** src, T* dst, int len, int cn )
else if( k == 2 )
{
const T *src0 = src[0], *src1 = src[1];
for( i = j = 0; i < len; i++, j += cn )
i = j = 0;
#if CV_NEON
if(cn == 2)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 2 * inc_i;
VMerge2<T> vmerge;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, dst + j);
}
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
dst[j+1] = src1[i];
@@ -124,7 +319,19 @@ merge_( const T** src, T* dst, int len, int cn )
else if( k == 3 )
{
const T *src0 = src[0], *src1 = src[1], *src2 = src[2];
for( i = j = 0; i < len; i++, j += cn )
i = j = 0;
#if CV_NEON
if(cn == 3)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 3 * inc_i;
VMerge3<T> vmerge;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, src2 + i, dst + j);
}
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i];
dst[j+1] = src1[i];
@@ -134,7 +341,19 @@ merge_( const T** src, T* dst, int len, int cn )
else
{
const T *src0 = src[0], *src1 = src[1], *src2 = src[2], *src3 = src[3];
for( i = j = 0; i < len; i++, j += cn )
i = j = 0;
#if CV_NEON
if(cn == 4)
{
int inc_i = (sizeof(T) == 8)? 1: 16/sizeof(T);
int inc_j = 4 * inc_i;
VMerge4<T> vmerge;
for( ; i < len - inc_i; i += inc_i, j += inc_j)
vmerge(src0 + i, src1 + i, src2 + i, src3 + i, dst + j);
}
#endif
for( ; i < len; i++, j += cn )
{
dst[j] = src0[i]; dst[j+1] = src1[i];
dst[j+2] = src2[i]; dst[j+3] = src3[i];

View File

@@ -46,7 +46,7 @@
// */
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
namespace cv
{

View File

@@ -42,7 +42,7 @@
#include "precomp.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
#include "opencv2/core/opencl/runtime/opencl_core.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
#include <map>
namespace cv
@@ -1801,11 +1801,11 @@ private:
UMat twiddles;
String buildOptions;
int thread_count;
bool status;
int dft_size;
bool status;
public:
OCL_FftPlan(int _size): dft_size(_size), status(true)
OCL_FftPlan(int _size) : dft_size(_size), status(true)
{
int min_radix;
std::vector<int> radixes, blocks;
@@ -2635,8 +2635,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
for( i = 0; i < nonzero_rows; i++ )
{
uchar* sptr = src.data + i*src.step;
uchar* dptr0 = dst.data + i*dst.step;
const uchar* sptr = src.ptr(i);
uchar* dptr0 = dst.ptr(i);
uchar* dptr = dptr0;
if( tmp_buf )
@@ -2649,7 +2649,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
for( ; i < count; i++ )
{
uchar* dptr0 = dst.data + i*dst.step;
uchar* dptr0 = dst.ptr(i);
memset( dptr0, 0, dst_full_len );
}
@@ -2661,7 +2661,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{
int a = 0, b = count;
uchar *buf0, *buf1, *dbuf0, *dbuf1;
uchar* sptr0 = src.data;
const uchar* sptr0 = src.data;
uchar* dptr0 = dst.data;
buf0 = ptr;
ptr += len*complex_elem_size;
@@ -2800,7 +2800,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
int n = dst.cols;
if( elem_size == (int)sizeof(float) )
{
float* p0 = (float*)dst.data;
float* p0 = dst.ptr<float>();
size_t dstep = dst.step/sizeof(p0[0]);
for( i = 0; i < len; i++ )
{
@@ -2816,7 +2816,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
}
else
{
double* p0 = (double*)dst.data;
double* p0 = dst.ptr<double>();
size_t dstep = dst.step/sizeof(p0[0]);
for( i = 0; i < len; i++ )
{

View File

@@ -955,10 +955,10 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
SVD::compute(src, w, u, vt);
SVD::backSubst(w, u, vt, Mat(), _dst);
return type == CV_32F ?
(((float*)w.data)[0] >= FLT_EPSILON ?
((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
(((double*)w.data)[0] >= DBL_EPSILON ?
((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
(w.ptr<float>()[0] >= FLT_EPSILON ?
w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
(w.ptr<double>()[0] >= DBL_EPSILON ?
w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
}
CV_Assert( m == n );
@@ -975,10 +975,10 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
transpose(vt, u);
SVD::backSubst(w, u, vt, Mat(), _dst);
return type == CV_32F ?
(((float*)w.data)[0] >= FLT_EPSILON ?
((float*)w.data)[n-1]/((float*)w.data)[0] : 0) :
(((double*)w.data)[0] >= DBL_EPSILON ?
((double*)w.data)[n-1]/((double*)w.data)[0] : 0);
(w.ptr<float>()[0] >= FLT_EPSILON ?
w.ptr<float>()[n-1]/w.ptr<float>()[0] : 0) :
(w.ptr<double>()[0] >= DBL_EPSILON ?
w.ptr<double>()[n-1]/w.ptr<double>()[0] : 0);
}
CV_Assert( method == DECOMP_LU || method == DECOMP_CHOLESKY );
@@ -988,7 +988,7 @@ double cv::invert( InputArray _src, OutputArray _dst, int method )
if( n <= 3 )
{
uchar* srcdata = src.data;
const uchar* srcdata = src.data;
uchar* dstdata = dst.data;
size_t srcstep = src.step;
size_t dststep = dst.step;
@@ -1212,8 +1212,8 @@ bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int meth
#define bf(y) ((float*)(bdata + y*src2step))[0]
#define bd(y) ((double*)(bdata + y*src2step))[0]
uchar* srcdata = src.data;
uchar* bdata = _src2.data;
const uchar* srcdata = src.data;
const uchar* bdata = _src2.data;
uchar* dstdata = dst.data;
size_t srcstep = src.step;
size_t src2step = _src2.step;
@@ -1557,13 +1557,17 @@ static void _SVDcompute( InputArray _aarr, OutputArray _w,
{
if( !at )
{
transpose(temp_u, _u);
temp_v.copyTo(_vt);
if( _u.needed() )
transpose(temp_u, _u);
if( _vt.needed() )
temp_v.copyTo(_vt);
}
else
{
transpose(temp_v, _u);
temp_u.copyTo(_vt);
if( _u.needed() )
transpose(temp_v, _u);
if( _vt.needed() )
temp_u.copyTo(_vt);
}
}
}
@@ -1705,7 +1709,7 @@ cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
eigen(src, evals, evects);
if( evects0.data != evects.data )
{
uchar* p = evects0.data;
const uchar* p = evects0.data;
evects.convertTo(evects0, evects0.type());
CV_Assert( p == evects0.data );
}
@@ -1714,7 +1718,7 @@ cvEigenVV( CvArr* srcarr, CvArr* evectsarr, CvArr* evalsarr, double,
eigen(src, evals);
if( evals0.data != evals.data )
{
uchar* p = evals0.data;
const uchar* p = evals0.data;
if( evals0.size() == evals.size() )
evals.convertTo(evals0, evals0.type());
else if( evals0.type() == evals.type() )

1119
modules/core/src/lda.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -41,7 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
namespace cv
{

View File

@@ -41,7 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
namespace cv
@@ -3295,7 +3295,6 @@ void cv::PCABackProject(InputArray data, InputArray mean,
pca.backProject(data, result);
}
/****************************************************************************************\
* Earlier API *
\****************************************************************************************/

View File

@@ -41,7 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
#include "bufferpool.impl.hpp"
@@ -346,7 +346,7 @@ static void finalizeHdr(Mat& m)
if( d > 2 )
m.rows = m.cols = -1;
if(m.u)
m.data = m.datastart = m.u->data;
m.datastart = m.data = m.u->data;
if( m.data )
{
m.datalimit = m.datastart + m.size[0]*m.step[0];
@@ -510,7 +510,7 @@ Mat::Mat(int _dims, const int* _sizes, int _type, void* _data, const size_t* _st
datalimit(0), allocator(0), u(0), size(&rows)
{
flags |= CV_MAT_TYPE(_type);
data = datastart = (uchar*)_data;
datastart = data = (uchar*)_data;
setSize(*this, _dims, _sizes, _steps, true);
finalizeHdr(*this);
}
@@ -549,7 +549,7 @@ static Mat cvMatNDToMat(const CvMatND* m, bool copyData)
if( !m )
return thiz;
thiz.data = thiz.datastart = m->data.ptr;
thiz.datastart = thiz.data = m->data.ptr;
thiz.flags |= CV_MAT_TYPE(m->type);
int _sizes[CV_MAX_DIM];
size_t _steps[CV_MAX_DIM];
@@ -587,7 +587,7 @@ static Mat cvMatToMat(const CvMat* m, bool copyData)
thiz.dims = 2;
thiz.rows = m->rows;
thiz.cols = m->cols;
thiz.data = thiz.datastart = m->data.ptr;
thiz.datastart = thiz.data = m->data.ptr;
size_t esz = CV_ELEM_SIZE(m->type), minstep = thiz.cols*esz, _step = m->step;
if( _step == 0 )
_step = minstep;
@@ -597,7 +597,7 @@ static Mat cvMatToMat(const CvMat* m, bool copyData)
}
else
{
thiz.data = thiz.datastart = thiz.dataend = 0;
thiz.datastart = thiz.dataend = thiz.data = 0;
Mat(m->rows, m->cols, m->type, m->data.ptr, m->step).copyTo(thiz);
}
@@ -636,7 +636,7 @@ static Mat iplImageToMat(const IplImage* img, bool copyData)
m.rows = img->roi->height;
m.cols = img->roi->width;
esz = CV_ELEM_SIZE(m.flags);
m.data = m.datastart = (uchar*)img->imageData +
m.datastart = m.data = (uchar*)img->imageData +
(selectedPlane ? (img->roi->coi - 1)*m.step*img->height : 0) +
img->roi->yOffset*m.step[0] + img->roi->xOffset*esz;
}
@@ -2758,15 +2758,18 @@ namespace cv {
static bool ocl_setIdentity( InputOutputArray _m, const Scalar& s )
{
int type = _m.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = cn;
if (cn == 1)
int type = _m.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = cn, rowsPerWI = 1;
int sctype = CV_MAKE_TYPE(depth, cn == 3 ? 4 : cn);
if (ocl::Device::getDefault().isIntel())
{
kercn = std::min(ocl::predictOptimalVectorWidth(_m), 4);
if (kercn != 4)
kercn = 1;
rowsPerWI = 4;
if (cn == 1)
{
kercn = std::min(ocl::predictOptimalVectorWidth(_m), 4);
if (kercn != 4)
kercn = 1;
}
}
int sctype = CV_MAKE_TYPE(depth, cn == 3 ? 4 : cn),
rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
ocl::Kernel k("setIdentity", ocl::core::set_identity_oclsrc,
format("-D T=%s -D T1=%s -D cn=%d -D ST=%s -D kercn=%d -D rowsPerWI=%d",
@@ -5529,14 +5532,14 @@ double norm( const SparseMat& src, int normType )
{
if( normType == NORM_INF )
for( i = 0; i < N; i++, ++it )
result = std::max(result, std::abs((double)*(const float*)it.ptr));
result = std::max(result, std::abs((double)it.value<float>()));
else if( normType == NORM_L1 )
for( i = 0; i < N; i++, ++it )
result += std::abs(*(const float*)it.ptr);
result += std::abs(it.value<float>());
else
for( i = 0; i < N; i++, ++it )
{
double v = *(const float*)it.ptr;
double v = it.value<float>();
result += v*v;
}
}
@@ -5544,14 +5547,14 @@ double norm( const SparseMat& src, int normType )
{
if( normType == NORM_INF )
for( i = 0; i < N; i++, ++it )
result = std::max(result, std::abs(*(const double*)it.ptr));
result = std::max(result, std::abs(it.value<double>()));
else if( normType == NORM_L1 )
for( i = 0; i < N; i++, ++it )
result += std::abs(*(const double*)it.ptr);
result += std::abs(it.value<double>());
else
for( i = 0; i < N; i++, ++it )
{
double v = *(const double*)it.ptr;
double v = it.value<double>();
result += v*v;
}
}
@@ -5575,7 +5578,7 @@ void minMaxLoc( const SparseMat& src, double* _minval, double* _maxval, int* _mi
float minval = FLT_MAX, maxval = -FLT_MAX;
for( i = 0; i < N; i++, ++it )
{
float v = *(const float*)it.ptr;
float v = it.value<float>();
if( v < minval )
{
minval = v;
@@ -5597,7 +5600,7 @@ void minMaxLoc( const SparseMat& src, double* _minval, double* _maxval, int* _mi
double minval = DBL_MAX, maxval = -DBL_MAX;
for( i = 0; i < N; i++, ++it )
{
double v = *(const double*)it.ptr;
double v = it.value<double>();
if( v < minval )
{
minval = v;

View File

@@ -57,6 +57,28 @@
# endif
#endif
// TODO Move to some common place
static bool getBoolParameter(const char* name, bool defaultValue)
{
const char* envValue = getenv(name);
if (envValue == NULL)
{
return defaultValue;
}
cv::String value = envValue;
if (value == "1" || value == "True" || value == "true" || value == "TRUE")
{
return true;
}
if (value == "0" || value == "False" || value == "false" || value == "FALSE")
{
return false;
}
CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str()));
}
// TODO Move to some common place
static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue)
{
@@ -1305,7 +1327,18 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
#ifdef _DEBUG
#define CV_OclDbgAssert CV_DbgAssert
#else
#define CV_OclDbgAssert(expr) (void)(expr)
static bool isRaiseError()
{
static bool initialized = false;
static bool value = false;
if (!initialized)
{
value = getBoolParameter("OPENCV_OPENCL_RAISE_ERROR", false);
initialized = true;
}
return value;
}
#define CV_OclDbgAssert(expr) do { if (isRaiseError()) { CV_Assert(expr); } else { (void)(expr); } } while ((void)0, 0)
#endif
namespace cv { namespace ocl {
@@ -4711,4 +4744,16 @@ void* Image2D::ptr() const
return p ? p->handle : 0;
}
bool isPerformanceCheckBypassed()
{
static bool initialized = false;
static bool value = false;
if (!initialized)
{
value = getBoolParameter("OPENCV_OPENCL_PERF_CHECK_BYPASS", false);
initialized = true;
}
return value;
}
}}

View File

@@ -424,7 +424,7 @@ void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const
const int x3 = x2 + t/3;
float2 a0, a1, a2, a3, a4, a5, a6, a7, a8;
if (x1 < t/2)
if (x1 < t/3)
{
a0 = smem[x1]; a1 = smem[x1+t]; a2 = smem[x1+2*t];
a3 = smem[x2]; a4 = smem[x2+t]; a5 = smem[x2+2*t];
@@ -433,7 +433,7 @@ void fft_radix3_B3(__local float2* smem, __global const float2* twiddles, const
barrier(CLK_LOCAL_MEM_FENCE);
if (x1 < t/2)
if (x1 < t/3)
{
butterfly3(a0, a1, a2, smem, twiddles, x1, block_size);
butterfly3(a3, a4, a5, smem, twiddles, x2, block_size);

View File

@@ -59,7 +59,7 @@ __kernel void meanStdDev(__global const uchar * srcptr, int src_step, int src_of
for (int grain = groups * WGS; id < total; id += grain)
{
#ifdef HAVE_MASK
#ifdef HAVE_SRC_CONT
#ifdef HAVE_MASK_CONT
int mask_index = id;
#else
int mask_index = mad24(id / cols, mask_step, id % cols);

View File

@@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "../../precomp.hpp"
#ifdef HAVE_CLAMDBLAS

View File

@@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "../../precomp.hpp"
#ifdef HAVE_CLAMDFFT

View File

@@ -39,7 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "../../precomp.hpp"
#if defined(HAVE_OPENCL) && !defined(HAVE_OPENCL_STATIC)

View File

@@ -44,7 +44,7 @@
#include <climits>
#include <limits>
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
namespace cv
{
@@ -568,7 +568,7 @@ cv::Scalar cv::sum( InputArray _src )
{
#ifdef HAVE_OPENCL
Scalar _res;
CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_sum(_src, _res, OCL_OP_SUM),
_res)
#endif
@@ -719,7 +719,7 @@ int cv::countNonZero( InputArray _src )
#ifdef HAVE_OPENCL
int res = -1;
CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_countNonZero(_src, res),
res)
#endif
@@ -782,7 +782,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
int type = src.type();
if( !mask.empty() )
{
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *);
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *);
ippiMaskMeanFuncC1 ippFuncC1 =
type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR :
type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR :
@@ -795,7 +795,7 @@ cv::Scalar cv::mean( InputArray _src, InputArray _mask )
return Scalar(res);
setIppErrorStatus();
}
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *);
typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *);
ippiMaskMeanFuncC3 ippFuncC3 =
type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR :
@@ -918,7 +918,8 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
isContinuous = _src.isContinuous();
isContinuous = _src.isContinuous(),
isMaskContinuous = _mask.isContinuous();
const ocl::Device &defDev = ocl::Device::getDefault();
int groups = defDev.maxComputeUnits();
if (defDev.isIntel())
@@ -943,13 +944,14 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
char cvt[2][40];
String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d"
" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s"
" -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s"
" -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s",
ocl::typeToStr(type), ocl::typeToStr(depth),
ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth,
ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth),
ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]),
cn, isContinuous ? " -D HAVE_SRC_CONT" : "",
isMaskContinuous ? " -D HAVE_MASK_CONT" : "",
ocl::convertTypeStr(depth, ddepth, cn, cvt[1]),
(int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "",
doubleSupport ? " -D DOUBLE_SUPPORT" : "");
@@ -1025,7 +1027,7 @@ static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv
void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask )
{
CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2,
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_meanStdDev(_src, _mean, _sdv, _mask))
Mat src = _src.getMat(), mask = _mask.getMat();
@@ -1069,7 +1071,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
int type = src.type();
if( !mask.empty() )
{
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *, Ipp64f *);
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *);
ippiMaskMeanStdDevFuncC1 ippFuncC1 =
type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR :
type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR :
@@ -1081,7 +1083,7 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input
return;
setIppErrorStatus();
}
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *);
ippiMaskMeanStdDevFuncC3 ippFuncC3 =
type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR :
type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR :
@@ -1571,7 +1573,7 @@ void cv::minMaxIdx(InputArray _src, double* minVal,
CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) ||
(cn > 1 && _mask.empty() && !minIdx && !maxIdx) );
CV_OCL_RUN(_src.isUMat() && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()),
ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask))
Mat src = _src.getMat(), mask = _mask.getMat();
@@ -2234,7 +2236,7 @@ double cv::norm( InputArray _src, int normType, InputArray _mask )
#ifdef HAVE_OPENCL
double _result = 0;
CV_OCL_RUN_(_src.isUMat() && _src.dims() <= 2,
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2,
ocl_norm(_src, normType, _mask, _result),
_result)
#endif
@@ -2594,7 +2596,7 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m
#ifdef HAVE_OPENCL
double _result = 0;
CV_OCL_RUN_(_src1.isUMat(),
CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()),
ocl_norm(_src1, _src2, normType, _mask, _result),
_result)
#endif

View File

@@ -41,7 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencl_kernels_core.hpp"
///////////////////////////////// UMat implementation ///////////////////////////////
@@ -582,7 +582,7 @@ Mat UMat::getMat(int accessFlags) const
hdr.flags = flags;
hdr.u = u;
hdr.datastart = u->data;
hdr.data = hdr.datastart + offset;
hdr.data = u->data + offset;
hdr.datalimit = hdr.dataend = u->data + u->size;
CV_XADD(&hdr.u->refcount, 1);
return hdr;
@@ -593,15 +593,16 @@ void* UMat::handle(int accessFlags) const
if( !u )
return 0;
if ((accessFlags & ACCESS_WRITE) != 0)
u->markHostCopyObsolete(true);
// check flags: if CPU copy is newer, copy it back to GPU.
if( u->deviceCopyObsolete() )
{
CV_Assert(u->refcount == 0);
u->currAllocator->unmap(u);
}
if ((accessFlags & ACCESS_WRITE) != 0)
u->markHostCopyObsolete(true);
return u->handle;
}

View File

@@ -39,7 +39,7 @@
//
//M*/
#include "test_precomp.hpp"
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#include <cmath>
@@ -157,6 +157,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatDepth, Channels, bool)
Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC1, 0, 2);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
*mask.ptr(0) = 255; // prevent test case with mask filled 0 only
val = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0),
rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0));
@@ -1419,7 +1420,7 @@ OCL_TEST_P(UMatDot, Mat)
OCL_OFF(const double cpuRes = src1_roi.dot(src2_roi));
OCL_ON(const double gpuRes = usrc1_roi.dot(usrc2_roi));
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6);
EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-5);
}
}
@@ -1749,7 +1750,7 @@ OCL_TEST_P(ReduceAvg, Mat)
OCL_OFF(cv::reduce(src_roi, dst_roi, dim, CV_REDUCE_AVG, dtype));
OCL_ON(cv::reduce(usrc_roi, udst_roi, dim, CV_REDUCE_AVG, dtype));
double eps = ddepth <= CV_32S ? 1 : 5e-6;
double eps = ddepth <= CV_32S ? 1 : 6e-6;
OCL_EXPECT_MATS_NEAR(dst, eps);
}
}

View File

@@ -44,7 +44,7 @@
//
//M*/
#include "test_precomp.hpp"
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
@@ -105,6 +105,7 @@ PARAM_TEST_CASE(Merge, MatDepth, int, bool)
UMAT_UPLOAD_INPUT_PARAMETER(src3);
UMAT_UPLOAD_INPUT_PARAMETER(src4);
src_roi.clear(); usrc_roi.clear(); // for test_loop_times > 1
src_roi.push_back(src1_roi), usrc_roi.push_back(usrc1_roi);
if (nsrc >= 2)
src_roi.push_back(src2_roi), usrc_roi.push_back(usrc2_roi);

View File

@@ -43,7 +43,7 @@
//
//M*/
#include "test_precomp.hpp"
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
@@ -108,7 +108,7 @@ OCL_TEST_P(Dft, Mat)
{
generateTestData();
int nonzero_rows = hint ? src.cols - randomInt(1, src.rows-1) : 0;
int nonzero_rows = hint ? src.rows - randomInt(1, src.rows-1) : 0;
OCL_OFF(cv::dft(src, dst, dft_flags, nonzero_rows));
OCL_ON(cv::dft(usrc, udst, dft_flags, nonzero_rows));
@@ -175,7 +175,7 @@ OCL_TEST_P(MulSpectrums, Mat)
OCL_INSTANTIATE_TEST_CASE_P(OCL_ImgProc, MulSpectrums, testing::Combine(Bool(), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(10, 10), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)),
OCL_INSTANTIATE_TEST_CASE_P(Core, Dft, Combine(Values(cv::Size(45, 72), cv::Size(36, 36), cv::Size(512, 1), cv::Size(1280, 768)),
Values((OCL_FFT_TYPE) R2C, (OCL_FFT_TYPE) C2C, (OCL_FFT_TYPE) R2R, (OCL_FFT_TYPE) C2R),
Bool(), // DFT_INVERSE
Bool(), // DFT_ROWS

View File

@@ -42,7 +42,7 @@
//
//M*/
#include "test_precomp.hpp"
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL

View File

@@ -5,7 +5,7 @@
// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
#include "test_precomp.hpp"
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL

View File

@@ -44,7 +44,7 @@
//
//M*/
#include "test_precomp.hpp"
#include "../test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
@@ -96,7 +96,7 @@ OCL_TEST_P(ConvertTo, Accuracy)
OCL_OFF(src_roi.convertTo(dst_roi, dstType, alpha, beta));
OCL_ON(usrc_roi.convertTo(udst_roi, dstType, alpha, beta));
double eps = src_depth >= CV_32F || CV_MAT_DEPTH(dstType) >= CV_32F ? 1e-4 : 1;
double eps = CV_MAT_DEPTH(dstType) >= CV_32F ? 2e-4 : 1;
OCL_EXPECT_MATS_NEAR(dst, eps);
}
}
@@ -121,7 +121,7 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool)
use_mask = GET_PARAM(3);
}
void generateTestData()
void generateTestData(bool one_cn_mask = false)
{
const int type = CV_MAKE_TYPE(depth, cn);
@@ -132,9 +132,11 @@ PARAM_TEST_CASE(CopyTo, MatDepth, Channels, bool, bool)
if (use_mask)
{
Border maskBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
int mask_cn = randomDouble(0.0, 2.0) > 1.0 ? cn : 1;
int mask_cn = 1;
if (!one_cn_mask && randomDouble(0.0, 2.0) > 1.0)
mask_cn = cn;
randomSubMat(mask, mask_roi, roiSize, maskBorder, CV_8UC(mask_cn), 0, 2);
cv::threshold(mask, mask, 0.5, 255., CV_8UC1);
cv::threshold(mask, mask, 0.5, 255., THRESH_BINARY);
}
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
@@ -177,7 +179,7 @@ OCL_TEST_P(SetTo, Accuracy)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
generateTestData(true); // see modules/core/src/umatrix.cpp Ln:791 => CV_Assert( mask.size() == size() && mask.type() == CV_8UC1 );
if (use_mask)
{

View File

@@ -649,6 +649,16 @@ static void setValue(SparseMat& M, const int* idx, double value, RNG& rng)
CV_Error(CV_StsUnsupportedFormat, "");
}
template<typename Pixel>
struct InitializerFunctor{
/// Initializer for cv::Mat::forEach test
void operator()(Pixel & pixel, const int * idx) const {
pixel.x = idx[0];
pixel.y = idx[1];
pixel.z = idx[2];
}
};
void Core_ArrayOpTest::run( int /* start_from */)
{
int errcount = 0;
@@ -686,6 +696,45 @@ void Core_ArrayOpTest::run( int /* start_from */)
errcount++;
}
}
// test cv::Mat::forEach
{
const int dims[3] = { 101, 107, 7 };
typedef cv::Point3i Pixel;
cv::Mat a = cv::Mat::zeros(3, dims, CV_32SC3);
InitializerFunctor<Pixel> initializer;
a.forEach<Pixel>(initializer);
uint64 total = 0;
bool error_reported = false;
for (int i0 = 0; i0 < dims[0]; ++i0) {
for (int i1 = 0; i1 < dims[1]; ++i1) {
for (int i2 = 0; i2 < dims[2]; ++i2) {
Pixel& pixel = a.at<Pixel>(i0, i1, i2);
if (pixel.x != i0 || pixel.y != i1 || pixel.z != i2) {
if (!error_reported) {
ts->printf(cvtest::TS::LOG, "forEach is not correct.\n"
"First error detected at (%d, %d, %d).\n", pixel.x, pixel.y, pixel.z);
error_reported = true;
}
errcount++;
}
total += pixel.x;
total += pixel.y;
total += pixel.z;
}
}
}
uint64 total2 = 0;
for (size_t i = 0; i < sizeof(dims) / sizeof(dims[0]); ++i) {
total2 += ((dims[i] - 1) * dims[i] / 2) * dims[0] * dims[1] * dims[2] / dims[i];
}
if (total != total2) {
ts->printf(cvtest::TS::LOG, "forEach is not correct because total is invalid.\n");
errcount++;
}
}
RNG rng;
const int MAX_DIM = 5, MAX_DIM_SZ = 10;

View File

@@ -745,6 +745,24 @@ TEST(UMat, Sync)
EXPECT_EQ(0, cvtest::norm(um.getMat(ACCESS_READ), cv::Mat(um.size(), um.type(), 19), NORM_INF));
}
TEST(UMat, CopyToIfDeviceCopyIsObsolete)
{
UMat um(7, 2, CV_8UC1);
Mat m(um.size(), um.type());
m.setTo(Scalar::all(0));
{
// make obsolete device copy of UMat
Mat temp = um.getMat(ACCESS_WRITE);
temp.setTo(Scalar::all(10));
}
m.copyTo(um);
um.setTo(Scalar::all(17));
EXPECT_EQ(0, cvtest::norm(um.getMat(ACCESS_READ), Mat(um.size(), um.type(), 17), NORM_INF));
}
TEST(UMat, setOpenCL)
{
// save the current state