Implimentation "cv::Mat::forEach"
I propose forEach method for cv::Mat and cv::Mat_. This is solution for the overhead of MatIterator_<_Tp>. I runs a test that micro opecode runs all over the pixel of cv::Mat_<cv::Point3_<uint8_t>>. And this implementation 40% faster than the simple pointer, 80% faster than iterator. With OpenMP, 70% faster than simple pointer, 95% faster than iterator (Core i7 920). Above all, code is more readable. My test code is here. https://gist.github.com/kazuki-ma/8285876 Thanks.
This commit is contained in:
parent
cafcfc4d0f
commit
fa292c3d8d
@ -2326,6 +2326,69 @@ Returns the matrix iterator and sets it to the after-last matrix element.
|
|||||||
|
|
||||||
The methods return the matrix read-only or read-write iterators, set to the point following the last matrix element.
|
The methods return the matrix read-only or read-write iterators, set to the point following the last matrix element.
|
||||||
|
|
||||||
|
|
||||||
|
Mat::forEach
|
||||||
|
------------
|
||||||
|
Invoke with arguments functor, and runs the functor over all matrix element.
|
||||||
|
|
||||||
|
.. ocv:function:: template<typename _Tp, typename Functor> void Mat::forEach(Functor operation)
|
||||||
|
|
||||||
|
.. ocv:function:: template<typename _Tp, typename Functor> void Mat::forEach(Functor operation) const
|
||||||
|
|
||||||
|
The methos runs operation in parallel. Operation is passed by arguments. Operation have to be a function pointer, a function object or a lambda(C++11).
|
||||||
|
|
||||||
|
All of below operation is equal. Put 0xFF to first channel of all matrix elements. ::
|
||||||
|
|
||||||
|
Mat image(1920, 1080, CV_8UC3);
|
||||||
|
typedef cv::Point3_<uint8_t> Pixel;
|
||||||
|
|
||||||
|
// first. raw pointer access.
|
||||||
|
for (int r = 0; r < image.rows; ++r) {
|
||||||
|
Pixel* ptr = image.ptr<Pixel>(0, r);
|
||||||
|
const Pixel* ptr_end = ptr + image.cols;
|
||||||
|
for (; ptr != ptr_end; ++ptr) {
|
||||||
|
ptr->x = 255;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Using MatIterator. (Simple but there are a Iterator's overhead)
|
||||||
|
for (Pixel &p : cv::Mat_<Pixel>(image)) {
|
||||||
|
p.x = 255;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Parallel execution with function object.
|
||||||
|
struct Operator {
|
||||||
|
void operator ()(Pixel &pixel, const int * position) {
|
||||||
|
pixel.x = 255;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
image.forEach<Pixel>(Operator());
|
||||||
|
|
||||||
|
|
||||||
|
// Parallel execution using C++11 lambda.
|
||||||
|
image.forEach<Pixel>([](Pixel &p, const int * position) -> void {
|
||||||
|
p.x = 255;
|
||||||
|
});
|
||||||
|
|
||||||
|
position parameter is index of current pixel. ::
|
||||||
|
|
||||||
|
// Creating 3D matrix (255 x 255 x 255) typed uint8_t,
|
||||||
|
// and initialize all elements by the value which equals elements position.
|
||||||
|
// i.e. pixels (x,y,z) = (1,2,3) is (b,g,r) = (1,2,3).
|
||||||
|
|
||||||
|
int sizes[] = { 255, 255, 255 };
|
||||||
|
typedef cv::Point3_<uint8_t> Pixel;
|
||||||
|
|
||||||
|
Mat_<Pixel> image = Mat::zeros(3, sizes, CV_8UC3);
|
||||||
|
|
||||||
|
image.forEachWithPosition([&](Pixel& pixel, const int position[]) -> void{
|
||||||
|
pixel.x = position[0];
|
||||||
|
pixel.y = position[1];
|
||||||
|
pixel.z = position[2];
|
||||||
|
});
|
||||||
|
|
||||||
Mat\_
|
Mat\_
|
||||||
-----
|
-----
|
||||||
.. ocv:class:: Mat_
|
.. ocv:class:: Mat_
|
||||||
|
@ -897,6 +897,11 @@ public:
|
|||||||
template<typename _Tp> MatConstIterator_<_Tp> begin() const;
|
template<typename _Tp> MatConstIterator_<_Tp> begin() const;
|
||||||
template<typename _Tp> MatConstIterator_<_Tp> end() const;
|
template<typename _Tp> MatConstIterator_<_Tp> end() const;
|
||||||
|
|
||||||
|
//! template methods for for operation over all matrix elements.
|
||||||
|
// the operations take care of skipping gaps in the end of rows (if any)
|
||||||
|
template<typename _Tp, typename Functor> void forEach(const Functor& operation);
|
||||||
|
template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
|
||||||
|
|
||||||
enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
|
enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
|
||||||
enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
|
enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
|
||||||
|
|
||||||
@ -931,6 +936,7 @@ public:
|
|||||||
MatStep step;
|
MatStep step;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
template<typename _Tp, typename Functor> void forEach_impl(const Functor& operation);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -1040,6 +1046,11 @@ public:
|
|||||||
const_iterator begin() const;
|
const_iterator begin() const;
|
||||||
const_iterator end() const;
|
const_iterator end() const;
|
||||||
|
|
||||||
|
//! template methods for for operation over all matrix elements.
|
||||||
|
// the operations take care of skipping gaps in the end of rows (if any)
|
||||||
|
template<typename Functor> void forEach(const Functor& operation);
|
||||||
|
template<typename Functor> void forEach(const Functor& operation) const;
|
||||||
|
|
||||||
//! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
|
//! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
|
||||||
void create(int _rows, int _cols);
|
void create(int _rows, int _cols);
|
||||||
//! equivalent to Mat::create(_size, DataType<_Tp>::type)
|
//! equivalent to Mat::create(_size, DataType<_Tp>::type)
|
||||||
|
@ -999,6 +999,17 @@ MatIterator_<_Tp> Mat::end()
|
|||||||
return it;
|
return it;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename _Tp, typename Functor> inline
|
||||||
|
void Mat::forEach(const Functor& operation) {
|
||||||
|
this->forEach_impl<_Tp>(operation);
|
||||||
|
};
|
||||||
|
|
||||||
|
template<typename _Tp, typename Functor> inline
|
||||||
|
void Mat::forEach(const Functor& operation) const {
|
||||||
|
// call as not const
|
||||||
|
(const_cast<Mat*>(this))->forEach<const _Tp>(operation);
|
||||||
|
};
|
||||||
|
|
||||||
template<typename _Tp> inline
|
template<typename _Tp> inline
|
||||||
Mat::operator std::vector<_Tp>() const
|
Mat::operator std::vector<_Tp>() const
|
||||||
{
|
{
|
||||||
@ -1584,6 +1595,15 @@ MatIterator_<_Tp> Mat_<_Tp>::end()
|
|||||||
return Mat::end<_Tp>();
|
return Mat::end<_Tp>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> template<typename Functor> inline
|
||||||
|
void Mat_<_Tp>::forEach(const Functor& operation) {
|
||||||
|
Mat::forEach<_Tp, Functor>(operation);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename _Tp> template<typename Functor> inline
|
||||||
|
void Mat_<_Tp>::forEach(const Functor& operation) const {
|
||||||
|
Mat::forEach<_Tp, Functor>(operation);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////// SparseMat /////////////////////////////
|
///////////////////////////// SparseMat /////////////////////////////
|
||||||
|
|
||||||
|
@ -274,6 +274,102 @@ public:
|
|||||||
|
|
||||||
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
|
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
|
||||||
|
|
||||||
|
/////////////////////////////// forEach method of cv::Mat ////////////////////////////
|
||||||
|
template<typename _Tp, typename Functor> inline
|
||||||
|
void Mat::forEach_impl(const Functor& operation) {
|
||||||
|
if (false) {
|
||||||
|
operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(NULL));
|
||||||
|
// If your compiler fail in this line.
|
||||||
|
// Please check that your functor signature is
|
||||||
|
// (_Tp&, const int*) <- multidimential
|
||||||
|
// or (_Tp&, void*) <- in case of you don't need current idx.
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
|
||||||
|
const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
|
||||||
|
|
||||||
|
class PixelOperationWrapper :public ParallelLoopBody
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
|
||||||
|
: mat(frame), op(_operation) {};
|
||||||
|
virtual ~PixelOperationWrapper(){};
|
||||||
|
// ! Overloaded virtual operator
|
||||||
|
// convert range call to row call.
|
||||||
|
virtual void operator()(const Range &range) const {
|
||||||
|
const int DIMS = mat->dims;
|
||||||
|
const int COLS = mat->size[DIMS - 1];
|
||||||
|
if (DIMS <= 2) {
|
||||||
|
for (int row = range.start; row < range.end; ++row) {
|
||||||
|
this->rowCall2(row, COLS);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
std::vector<int> idx(COLS); /// idx is modified in this->rowCall
|
||||||
|
idx[DIMS - 2] = range.start - 1;
|
||||||
|
|
||||||
|
for (int line_num = range.start; line_num < range.end; ++line_num) {
|
||||||
|
idx[DIMS - 2]++;
|
||||||
|
for (int i = DIMS - 2; i >= 0; --i) {
|
||||||
|
if (idx[i] >= mat->size[i]) {
|
||||||
|
idx[i - 1] += idx[i] / mat->size[i];
|
||||||
|
idx[i] %= mat->size[i];
|
||||||
|
continue; // carry-over;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this->rowCall(&idx[0], COLS, DIMS);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
private:
|
||||||
|
Mat_<_Tp>* const mat;
|
||||||
|
const Functor op;
|
||||||
|
// ! Call operator for each elements in this row.
|
||||||
|
inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
|
||||||
|
int &col = idx[DIMS - 1];
|
||||||
|
col = 0;
|
||||||
|
_Tp* pixel = &(mat->template at<_Tp>(idx));
|
||||||
|
|
||||||
|
while (col < COLS) {
|
||||||
|
op(*pixel, const_cast<const int*>(idx));
|
||||||
|
pixel++; col++;
|
||||||
|
}
|
||||||
|
col = 0;
|
||||||
|
}
|
||||||
|
// ! Call operator for each elements in this row. 2d mat special version.
|
||||||
|
inline void rowCall2(const int row, const int COLS) const {
|
||||||
|
union Index{
|
||||||
|
int body[2];
|
||||||
|
operator const int*() const {
|
||||||
|
return reinterpret_cast<const int*>(this);
|
||||||
|
}
|
||||||
|
int& operator[](const int i) {
|
||||||
|
return body[i];
|
||||||
|
}
|
||||||
|
} idx = {{row, 0}};
|
||||||
|
// Special union is needed to avoid
|
||||||
|
// "error: array subscript is above array bounds [-Werror=array-bounds]"
|
||||||
|
// when call the functor `op` such that access idx[3].
|
||||||
|
|
||||||
|
_Tp* pixel = &(mat->template at<_Tp>(idx));
|
||||||
|
const _Tp* const pixel_end = pixel + COLS;
|
||||||
|
while(pixel < pixel_end) {
|
||||||
|
op(*pixel++, static_cast<const int*>(idx));
|
||||||
|
idx[1]++;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
|
||||||
|
CV_Assert(false);
|
||||||
|
// We can not remove this implementation because Visual Studio warning C4822.
|
||||||
|
return *this;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
|
||||||
|
};
|
||||||
|
|
||||||
/////////////////////////// Synchronization Primitives ///////////////////////////////
|
/////////////////////////// Synchronization Primitives ///////////////////////////////
|
||||||
|
|
||||||
class CV_EXPORTS Mutex
|
class CV_EXPORTS Mutex
|
||||||
|
@ -649,6 +649,16 @@ static void setValue(SparseMat& M, const int* idx, double value, RNG& rng)
|
|||||||
CV_Error(CV_StsUnsupportedFormat, "");
|
CV_Error(CV_StsUnsupportedFormat, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template<typename Pixel>
|
||||||
|
struct InitializerFunctor{
|
||||||
|
/// Initializer for cv::Mat::forEach test
|
||||||
|
void operator()(Pixel & pixel, const int * idx) const {
|
||||||
|
pixel.x = idx[0];
|
||||||
|
pixel.y = idx[1];
|
||||||
|
pixel.z = idx[2];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
void Core_ArrayOpTest::run( int /* start_from */)
|
void Core_ArrayOpTest::run( int /* start_from */)
|
||||||
{
|
{
|
||||||
int errcount = 0;
|
int errcount = 0;
|
||||||
@ -686,6 +696,45 @@ void Core_ArrayOpTest::run( int /* start_from */)
|
|||||||
errcount++;
|
errcount++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// test cv::Mat::forEach
|
||||||
|
{
|
||||||
|
const int dims[3] = { 101, 107, 7 };
|
||||||
|
typedef cv::Point3i Pixel;
|
||||||
|
|
||||||
|
cv::Mat a = cv::Mat::zeros(3, dims, CV_32SC3);
|
||||||
|
InitializerFunctor<Pixel> initializer;
|
||||||
|
|
||||||
|
a.forEach<Pixel>(initializer);
|
||||||
|
|
||||||
|
uint64 total = 0;
|
||||||
|
bool error_reported = false;
|
||||||
|
for (int i0 = 0; i0 < dims[0]; ++i0) {
|
||||||
|
for (int i1 = 0; i1 < dims[1]; ++i1) {
|
||||||
|
for (int i2 = 0; i2 < dims[2]; ++i2) {
|
||||||
|
Pixel& pixel = a.at<Pixel>(i0, i1, i2);
|
||||||
|
if (pixel.x != i0 || pixel.y != i1 || pixel.z != i2) {
|
||||||
|
if (!error_reported) {
|
||||||
|
ts->printf(cvtest::TS::LOG, "forEach is not correct.\n"
|
||||||
|
"First error detected at (%d, %d, %d).\n", pixel.x, pixel.y, pixel.z);
|
||||||
|
error_reported = true;
|
||||||
|
}
|
||||||
|
errcount++;
|
||||||
|
}
|
||||||
|
total += pixel.x;
|
||||||
|
total += pixel.y;
|
||||||
|
total += pixel.z;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint64 total2 = 0;
|
||||||
|
for (size_t i = 0; i < sizeof(dims) / sizeof(dims[0]); ++i) {
|
||||||
|
total2 += ((dims[i] - 1) * dims[i] / 2) * dims[0] * dims[1] * dims[2] / dims[i];
|
||||||
|
}
|
||||||
|
if (total != total2) {
|
||||||
|
ts->printf(cvtest::TS::LOG, "forEach is not correct because total is invalid.\n");
|
||||||
|
errcount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RNG rng;
|
RNG rng;
|
||||||
const int MAX_DIM = 5, MAX_DIM_SZ = 10;
|
const int MAX_DIM = 5, MAX_DIM_SZ = 10;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user