Implimentation "cv::Mat::forEach"

I propose forEach method for cv::Mat and cv::Mat_.
This is solution for the overhead of MatIterator_<_Tp>.

I runs a test that micro opecode runs all over the pixel of cv::Mat_<cv::Point3_<uint8_t>>.
And this implementation 40% faster than the simple pointer, 80% faster than iterator.
With OpenMP, 70% faster than simple pointer, 95% faster than iterator (Core i7 920).

Above all, code is more readable.

My test code is here.
    https://gist.github.com/kazuki-ma/8285876

Thanks.
This commit is contained in:
Kazuki Matsuda
2014-01-06 01:42:45 +09:00
parent cafcfc4d0f
commit fa292c3d8d
5 changed files with 239 additions and 0 deletions

View File

@@ -897,6 +897,11 @@ public:
template<typename _Tp> MatConstIterator_<_Tp> begin() const;
template<typename _Tp> MatConstIterator_<_Tp> end() const;
//! template methods for for operation over all matrix elements.
// the operations take care of skipping gaps in the end of rows (if any)
template<typename _Tp, typename Functor> void forEach(const Functor& operation);
template<typename _Tp, typename Functor> void forEach(const Functor& operation) const;
enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG };
enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 };
@@ -931,6 +936,7 @@ public:
MatStep step;
protected:
template<typename _Tp, typename Functor> void forEach_impl(const Functor& operation);
};
@@ -1040,6 +1046,11 @@ public:
const_iterator begin() const;
const_iterator end() const;
//! template methods for for operation over all matrix elements.
// the operations take care of skipping gaps in the end of rows (if any)
template<typename Functor> void forEach(const Functor& operation);
template<typename Functor> void forEach(const Functor& operation) const;
//! equivalent to Mat::create(_rows, _cols, DataType<_Tp>::type)
void create(int _rows, int _cols);
//! equivalent to Mat::create(_size, DataType<_Tp>::type)

View File

@@ -999,6 +999,17 @@ MatIterator_<_Tp> Mat::end()
return it;
}
template<typename _Tp, typename Functor> inline
void Mat::forEach(const Functor& operation) {
this->forEach_impl<_Tp>(operation);
};
template<typename _Tp, typename Functor> inline
void Mat::forEach(const Functor& operation) const {
// call as not const
(const_cast<Mat*>(this))->forEach<const _Tp>(operation);
};
template<typename _Tp> inline
Mat::operator std::vector<_Tp>() const
{
@@ -1584,6 +1595,15 @@ MatIterator_<_Tp> Mat_<_Tp>::end()
return Mat::end<_Tp>();
}
template<typename _Tp> template<typename Functor> inline
void Mat_<_Tp>::forEach(const Functor& operation) {
Mat::forEach<_Tp, Functor>(operation);
}
template<typename _Tp> template<typename Functor> inline
void Mat_<_Tp>::forEach(const Functor& operation) const {
Mat::forEach<_Tp, Functor>(operation);
}
///////////////////////////// SparseMat /////////////////////////////

View File

@@ -274,6 +274,102 @@ public:
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
/////////////////////////////// forEach method of cv::Mat ////////////////////////////
template<typename _Tp, typename Functor> inline
void Mat::forEach_impl(const Functor& operation) {
if (false) {
operation(*reinterpret_cast<_Tp*>(0), reinterpret_cast<int*>(NULL));
// If your compiler fail in this line.
// Please check that your functor signature is
// (_Tp&, const int*) <- multidimential
// or (_Tp&, void*) <- in case of you don't need current idx.
}
CV_Assert(this->total() / this->size[this->dims - 1] <= INT_MAX);
const int LINES = static_cast<int>(this->total() / this->size[this->dims - 1]);
class PixelOperationWrapper :public ParallelLoopBody
{
public:
PixelOperationWrapper(Mat_<_Tp>* const frame, const Functor& _operation)
: mat(frame), op(_operation) {};
virtual ~PixelOperationWrapper(){};
// ! Overloaded virtual operator
// convert range call to row call.
virtual void operator()(const Range &range) const {
const int DIMS = mat->dims;
const int COLS = mat->size[DIMS - 1];
if (DIMS <= 2) {
for (int row = range.start; row < range.end; ++row) {
this->rowCall2(row, COLS);
}
} else {
std::vector<int> idx(COLS); /// idx is modified in this->rowCall
idx[DIMS - 2] = range.start - 1;
for (int line_num = range.start; line_num < range.end; ++line_num) {
idx[DIMS - 2]++;
for (int i = DIMS - 2; i >= 0; --i) {
if (idx[i] >= mat->size[i]) {
idx[i - 1] += idx[i] / mat->size[i];
idx[i] %= mat->size[i];
continue; // carry-over;
}
else {
break;
}
}
this->rowCall(&idx[0], COLS, DIMS);
}
}
};
private:
Mat_<_Tp>* const mat;
const Functor op;
// ! Call operator for each elements in this row.
inline void rowCall(int* const idx, const int COLS, const int DIMS) const {
int &col = idx[DIMS - 1];
col = 0;
_Tp* pixel = &(mat->template at<_Tp>(idx));
while (col < COLS) {
op(*pixel, const_cast<const int*>(idx));
pixel++; col++;
}
col = 0;
}
// ! Call operator for each elements in this row. 2d mat special version.
inline void rowCall2(const int row, const int COLS) const {
union Index{
int body[2];
operator const int*() const {
return reinterpret_cast<const int*>(this);
}
int& operator[](const int i) {
return body[i];
}
} idx = {{row, 0}};
// Special union is needed to avoid
// "error: array subscript is above array bounds [-Werror=array-bounds]"
// when call the functor `op` such that access idx[3].
_Tp* pixel = &(mat->template at<_Tp>(idx));
const _Tp* const pixel_end = pixel + COLS;
while(pixel < pixel_end) {
op(*pixel++, static_cast<const int*>(idx));
idx[1]++;
}
};
PixelOperationWrapper& operator=(const PixelOperationWrapper &) {
CV_Assert(false);
// We can not remove this implementation because Visual Studio warning C4822.
return *this;
};
};
parallel_for_(cv::Range(0, LINES), PixelOperationWrapper(reinterpret_cast<Mat_<_Tp>*>(this), operation));
};
/////////////////////////// Synchronization Primitives ///////////////////////////////
class CV_EXPORTS Mutex