Replaced most of the usages of parallel_for with that of parallel_for_.
This should allow many algorithms to take advantage of more parallelization technologies.
This commit is contained in:
@@ -2755,7 +2755,7 @@ const int ITUR_BT_601_CGV = -385875;
|
||||
const int ITUR_BT_601_CBV = -74448;
|
||||
|
||||
template<int bIdx, int uIdx>
|
||||
struct YUV420sp2RGB888Invoker
|
||||
struct YUV420sp2RGB888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *muv;
|
||||
@@ -2764,10 +2764,10 @@ struct YUV420sp2RGB888Invoker
|
||||
YUV420sp2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin() * 2;
|
||||
int rangeEnd = range.end() * 2;
|
||||
int rangeBegin = range.start * 2;
|
||||
int rangeEnd = range.end * 2;
|
||||
|
||||
//R = 1.164(Y - 16) + 1.596(V - 128)
|
||||
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
|
||||
@@ -2824,7 +2824,7 @@ struct YUV420sp2RGB888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx, int uIdx>
|
||||
struct YUV420sp2RGBA8888Invoker
|
||||
struct YUV420sp2RGBA8888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *muv;
|
||||
@@ -2833,10 +2833,10 @@ struct YUV420sp2RGBA8888Invoker
|
||||
YUV420sp2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
: dst(_dst), my1(_y1), muv(_uv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin() * 2;
|
||||
int rangeEnd = range.end() * 2;
|
||||
int rangeBegin = range.start * 2;
|
||||
int rangeEnd = range.end * 2;
|
||||
|
||||
//R = 1.164(Y - 16) + 1.596(V - 128)
|
||||
//G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128)
|
||||
@@ -2897,7 +2897,7 @@ struct YUV420sp2RGBA8888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx>
|
||||
struct YUV420p2RGB888Invoker
|
||||
struct YUV420p2RGB888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *mu, *mv;
|
||||
@@ -2907,19 +2907,19 @@ struct YUV420p2RGB888Invoker
|
||||
YUV420p2RGB888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
|
||||
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
const int rangeBegin = range.begin() * 2;
|
||||
const int rangeEnd = range.end() * 2;
|
||||
const int rangeBegin = range.start * 2;
|
||||
const int rangeEnd = range.end * 2;
|
||||
|
||||
size_t uvsteps[2] = {width/2, stride - width/2};
|
||||
int usIdx = ustepIdx, vsIdx = vstepIdx;
|
||||
|
||||
const uchar* y1 = my1 + rangeBegin * stride;
|
||||
const uchar* u1 = mu + (range.begin() / 2) * stride;
|
||||
const uchar* v1 = mv + (range.begin() / 2) * stride;
|
||||
const uchar* u1 = mu + (range.start / 2) * stride;
|
||||
const uchar* v1 = mv + (range.start / 2) * stride;
|
||||
|
||||
if(range.begin() % 2 == 1)
|
||||
if(range.start % 2 == 1)
|
||||
{
|
||||
u1 += uvsteps[(usIdx++) & 1];
|
||||
v1 += uvsteps[(vsIdx++) & 1];
|
||||
@@ -2965,7 +2965,7 @@ struct YUV420p2RGB888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx>
|
||||
struct YUV420p2RGBA8888Invoker
|
||||
struct YUV420p2RGBA8888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* my1, *mu, *mv;
|
||||
@@ -2975,19 +2975,19 @@ struct YUV420p2RGBA8888Invoker
|
||||
YUV420p2RGBA8888Invoker(Mat* _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int _ustepIdx, int _vstepIdx)
|
||||
: dst(_dst), my1(_y1), mu(_u), mv(_v), width(_dst->cols), stride(_stride), ustepIdx(_ustepIdx), vstepIdx(_vstepIdx) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin() * 2;
|
||||
int rangeEnd = range.end() * 2;
|
||||
int rangeBegin = range.start * 2;
|
||||
int rangeEnd = range.end * 2;
|
||||
|
||||
size_t uvsteps[2] = {width/2, stride - width/2};
|
||||
int usIdx = ustepIdx, vsIdx = vstepIdx;
|
||||
|
||||
const uchar* y1 = my1 + rangeBegin * stride;
|
||||
const uchar* u1 = mu + (range.begin() / 2) * stride;
|
||||
const uchar* v1 = mv + (range.begin() / 2) * stride;
|
||||
const uchar* u1 = mu + (range.start / 2) * stride;
|
||||
const uchar* v1 = mv + (range.start / 2) * stride;
|
||||
|
||||
if(range.begin() % 2 == 1)
|
||||
if(range.start % 2 == 1)
|
||||
{
|
||||
u1 += uvsteps[(usIdx++) & 1];
|
||||
v1 += uvsteps[(vsIdx++) & 1];
|
||||
@@ -3042,48 +3042,40 @@ template<int bIdx, int uIdx>
|
||||
inline void cvtYUV420sp2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
{
|
||||
YUV420sp2RGB888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
template<int bIdx, int uIdx>
|
||||
inline void cvtYUV420sp2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _uv)
|
||||
{
|
||||
YUV420sp2RGBA8888Invoker<bIdx, uIdx> converter(&_dst, _stride, _y1, _uv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
template<int bIdx>
|
||||
inline void cvtYUV420p2RGB(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
|
||||
{
|
||||
YUV420p2RGB888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
template<int bIdx>
|
||||
inline void cvtYUV420p2RGBA(Mat& _dst, int _stride, const uchar* _y1, const uchar* _u, const uchar* _v, int ustepIdx, int vstepIdx)
|
||||
{
|
||||
YUV420p2RGBA8888Invoker<bIdx> converter(&_dst, _stride, _y1, _u, _v, ustepIdx, vstepIdx);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV420_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows/2), converter);
|
||||
parallel_for_(Range(0, _dst.rows/2), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows/2));
|
||||
converter(Range(0, _dst.rows/2));
|
||||
}
|
||||
|
||||
///////////////////////////////////// RGB -> YUV420p /////////////////////////////////////
|
||||
@@ -3167,7 +3159,7 @@ static void cvtRGBtoYUV420p(const Mat& src, Mat& dst)
|
||||
///////////////////////////////////// YUV422 -> RGB /////////////////////////////////////
|
||||
|
||||
template<int bIdx, int uIdx, int yIdx>
|
||||
struct YUV422toRGB888Invoker
|
||||
struct YUV422toRGB888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* src;
|
||||
@@ -3176,10 +3168,10 @@ struct YUV422toRGB888Invoker
|
||||
YUV422toRGB888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
|
||||
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin();
|
||||
int rangeEnd = range.end();
|
||||
int rangeBegin = range.start;
|
||||
int rangeEnd = range.end;
|
||||
|
||||
const int uidx = 1 - yIdx + uIdx * 2;
|
||||
const int vidx = (2 + uidx) % 4;
|
||||
@@ -3213,7 +3205,7 @@ struct YUV422toRGB888Invoker
|
||||
};
|
||||
|
||||
template<int bIdx, int uIdx, int yIdx>
|
||||
struct YUV422toRGBA8888Invoker
|
||||
struct YUV422toRGBA8888Invoker : ParallelLoopBody
|
||||
{
|
||||
Mat* dst;
|
||||
const uchar* src;
|
||||
@@ -3222,10 +3214,10 @@ struct YUV422toRGBA8888Invoker
|
||||
YUV422toRGBA8888Invoker(Mat* _dst, int _stride, const uchar* _yuv)
|
||||
: dst(_dst), src(_yuv), width(_dst->cols), stride(_stride) {}
|
||||
|
||||
void operator()(const BlockedRange& range) const
|
||||
void operator()(const Range& range) const
|
||||
{
|
||||
int rangeBegin = range.begin();
|
||||
int rangeEnd = range.end();
|
||||
int rangeBegin = range.start;
|
||||
int rangeEnd = range.end;
|
||||
|
||||
const int uidx = 1 - yIdx + uIdx * 2;
|
||||
const int vidx = (2 + uidx) % 4;
|
||||
@@ -3266,24 +3258,20 @@ template<int bIdx, int uIdx, int yIdx>
|
||||
inline void cvtYUV422toRGB(Mat& _dst, int _stride, const uchar* _yuv)
|
||||
{
|
||||
YUV422toRGB888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows), converter);
|
||||
parallel_for_(Range(0, _dst.rows), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows));
|
||||
converter(Range(0, _dst.rows));
|
||||
}
|
||||
|
||||
template<int bIdx, int uIdx, int yIdx>
|
||||
inline void cvtYUV422toRGBA(Mat& _dst, int _stride, const uchar* _yuv)
|
||||
{
|
||||
YUV422toRGBA8888Invoker<bIdx, uIdx, yIdx> converter(&_dst, _stride, _yuv);
|
||||
#ifdef HAVE_TBB
|
||||
if (_dst.total() >= MIN_SIZE_FOR_PARALLEL_YUV422_CONVERSION)
|
||||
parallel_for(BlockedRange(0, _dst.rows), converter);
|
||||
parallel_for_(Range(0, _dst.rows), converter);
|
||||
else
|
||||
#endif
|
||||
converter(BlockedRange(0, _dst.rows));
|
||||
converter(Range(0, _dst.rows));
|
||||
}
|
||||
|
||||
/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
|
||||
|
@@ -443,7 +443,7 @@ icvGetDistanceTransformMask( int maskType, float *metrics )
|
||||
namespace cv
|
||||
{
|
||||
|
||||
struct DTColumnInvoker
|
||||
struct DTColumnInvoker : ParallelLoopBody
|
||||
{
|
||||
DTColumnInvoker( const CvMat* _src, CvMat* _dst, const int* _sat_tab, const float* _sqr_tab)
|
||||
{
|
||||
@@ -453,9 +453,9 @@ struct DTColumnInvoker
|
||||
sqr_tab = _sqr_tab;
|
||||
}
|
||||
|
||||
void operator()( const BlockedRange& range ) const
|
||||
void operator()( const Range& range ) const
|
||||
{
|
||||
int i, i1 = range.begin(), i2 = range.end();
|
||||
int i, i1 = range.start, i2 = range.end;
|
||||
int m = src->rows;
|
||||
size_t sstep = src->step, dstep = dst->step/sizeof(float);
|
||||
AutoBuffer<int> _d(m);
|
||||
@@ -490,7 +490,7 @@ struct DTColumnInvoker
|
||||
};
|
||||
|
||||
|
||||
struct DTRowInvoker
|
||||
struct DTRowInvoker : ParallelLoopBody
|
||||
{
|
||||
DTRowInvoker( CvMat* _dst, const float* _sqr_tab, const float* _inv_tab )
|
||||
{
|
||||
@@ -499,10 +499,10 @@ struct DTRowInvoker
|
||||
inv_tab = _inv_tab;
|
||||
}
|
||||
|
||||
void operator()( const BlockedRange& range ) const
|
||||
void operator()( const Range& range ) const
|
||||
{
|
||||
const float inf = 1e15f;
|
||||
int i, i1 = range.begin(), i2 = range.end();
|
||||
int i, i1 = range.start, i2 = range.end;
|
||||
int n = dst->cols;
|
||||
AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int));
|
||||
float* f = (float*)(uchar*)_buf;
|
||||
@@ -586,7 +586,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst )
|
||||
for( ; i <= m*3; i++ )
|
||||
sat_tab[i] = i - shift;
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
|
||||
cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab));
|
||||
|
||||
// stage 2: compute modified distance transform for each row
|
||||
float* inv_tab = sqr_tab + n;
|
||||
@@ -598,7 +598,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst )
|
||||
sqr_tab[i] = (float)(i*i);
|
||||
}
|
||||
|
||||
cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
|
||||
cv::parallel_for_(cv::Range(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));
|
||||
}
|
||||
|
||||
|
||||
|
@@ -2986,29 +2986,23 @@ cvCalcProbDensity( const CvHistogram* hist, const CvHistogram* hist_mask,
|
||||
}
|
||||
}
|
||||
|
||||
class EqualizeHistCalcHist_Invoker
|
||||
class EqualizeHistCalcHist_Invoker : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
enum {HIST_SZ = 256};
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
typedef tbb::mutex* MutextPtr;
|
||||
#else
|
||||
typedef void* MutextPtr;
|
||||
#endif
|
||||
|
||||
EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, MutextPtr histogramLock)
|
||||
EqualizeHistCalcHist_Invoker(cv::Mat& src, int* histogram, cv::Mutex* histogramLock)
|
||||
: src_(src), globalHistogram_(histogram), histogramLock_(histogramLock)
|
||||
{ }
|
||||
|
||||
void operator()( const cv::BlockedRange& rowRange ) const
|
||||
void operator()( const cv::Range& rowRange ) const
|
||||
{
|
||||
int localHistogram[HIST_SZ] = {0, };
|
||||
|
||||
const size_t sstep = src_.step;
|
||||
|
||||
int width = src_.cols;
|
||||
int height = rowRange.end() - rowRange.begin();
|
||||
int height = rowRange.end - rowRange.start;
|
||||
|
||||
if (src_.isContinuous())
|
||||
{
|
||||
@@ -3016,7 +3010,7 @@ public:
|
||||
height = 1;
|
||||
}
|
||||
|
||||
for (const uchar* ptr = src_.ptr<uchar>(rowRange.begin()); height--; ptr += sstep)
|
||||
for (const uchar* ptr = src_.ptr<uchar>(rowRange.start); height--; ptr += sstep)
|
||||
{
|
||||
int x = 0;
|
||||
for (; x <= width - 4; x += 4)
|
||||
@@ -3031,9 +3025,7 @@ public:
|
||||
localHistogram[ptr[x]]++;
|
||||
}
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex::scoped_lock lock(*histogramLock_);
|
||||
#endif
|
||||
cv::AutoLock lock(*histogramLock_);
|
||||
|
||||
for( int i = 0; i < HIST_SZ; i++ )
|
||||
globalHistogram_[i] += localHistogram[i];
|
||||
@@ -3041,12 +3033,7 @@ public:
|
||||
|
||||
static bool isWorthParallel( const cv::Mat& src )
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
return ( src.total() >= 640*480 );
|
||||
#else
|
||||
(void)src;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -3054,10 +3041,10 @@ private:
|
||||
|
||||
cv::Mat& src_;
|
||||
int* globalHistogram_;
|
||||
MutextPtr histogramLock_;
|
||||
cv::Mutex* histogramLock_;
|
||||
};
|
||||
|
||||
class EqualizeHistLut_Invoker
|
||||
class EqualizeHistLut_Invoker : public cv::ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
EqualizeHistLut_Invoker( cv::Mat& src, cv::Mat& dst, int* lut )
|
||||
@@ -3066,13 +3053,13 @@ public:
|
||||
lut_(lut)
|
||||
{ }
|
||||
|
||||
void operator()( const cv::BlockedRange& rowRange ) const
|
||||
void operator()( const cv::Range& rowRange ) const
|
||||
{
|
||||
const size_t sstep = src_.step;
|
||||
const size_t dstep = dst_.step;
|
||||
|
||||
int width = src_.cols;
|
||||
int height = rowRange.end() - rowRange.begin();
|
||||
int height = rowRange.end - rowRange.start;
|
||||
int* lut = lut_;
|
||||
|
||||
if (src_.isContinuous() && dst_.isContinuous())
|
||||
@@ -3081,8 +3068,8 @@ public:
|
||||
height = 1;
|
||||
}
|
||||
|
||||
const uchar* sptr = src_.ptr<uchar>(rowRange.begin());
|
||||
uchar* dptr = dst_.ptr<uchar>(rowRange.begin());
|
||||
const uchar* sptr = src_.ptr<uchar>(rowRange.start);
|
||||
uchar* dptr = dst_.ptr<uchar>(rowRange.start);
|
||||
|
||||
for (; height--; sptr += sstep, dptr += dstep)
|
||||
{
|
||||
@@ -3111,12 +3098,7 @@ public:
|
||||
|
||||
static bool isWorthParallel( const cv::Mat& src )
|
||||
{
|
||||
#ifdef HAVE_TBB
|
||||
return ( src.total() >= 640*480 );
|
||||
#else
|
||||
(void)src;
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -3143,23 +3125,18 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst )
|
||||
if(src.empty())
|
||||
return;
|
||||
|
||||
#ifdef HAVE_TBB
|
||||
tbb::mutex histogramLockInstance;
|
||||
EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = &histogramLockInstance;
|
||||
#else
|
||||
EqualizeHistCalcHist_Invoker::MutextPtr histogramLock = 0;
|
||||
#endif
|
||||
Mutex histogramLockInstance;
|
||||
|
||||
const int hist_sz = EqualizeHistCalcHist_Invoker::HIST_SZ;
|
||||
int hist[hist_sz] = {0,};
|
||||
int lut[hist_sz];
|
||||
|
||||
EqualizeHistCalcHist_Invoker calcBody(src, hist, histogramLock);
|
||||
EqualizeHistCalcHist_Invoker calcBody(src, hist, &histogramLockInstance);
|
||||
EqualizeHistLut_Invoker lutBody(src, dst, lut);
|
||||
cv::BlockedRange heightRange(0, src.rows);
|
||||
cv::Range heightRange(0, src.rows);
|
||||
|
||||
if(EqualizeHistCalcHist_Invoker::isWorthParallel(src))
|
||||
parallel_for(heightRange, calcBody);
|
||||
parallel_for_(heightRange, calcBody);
|
||||
else
|
||||
calcBody(heightRange);
|
||||
|
||||
@@ -3183,7 +3160,7 @@ void cv::equalizeHist( InputArray _src, OutputArray _dst )
|
||||
}
|
||||
|
||||
if(EqualizeHistLut_Invoker::isWorthParallel(src))
|
||||
parallel_for(heightRange, lutBody);
|
||||
parallel_for_(heightRange, lutBody);
|
||||
else
|
||||
lutBody(heightRange);
|
||||
}
|
||||
|
@@ -1081,7 +1081,7 @@ cv::Mat cv::getStructuringElement(int shape, Size ksize, Point anchor)
|
||||
namespace cv
|
||||
{
|
||||
|
||||
class MorphologyRunner
|
||||
class MorphologyRunner : public ParallelLoopBody
|
||||
{
|
||||
public:
|
||||
MorphologyRunner(Mat _src, Mat _dst, int _nStripes, int _iterations,
|
||||
@@ -1102,14 +1102,14 @@ public:
|
||||
columnBorderType = _columnBorderType;
|
||||
}
|
||||
|
||||
void operator () ( const BlockedRange& range ) const
|
||||
void operator () ( const Range& range ) const
|
||||
{
|
||||
int row0 = min(cvRound(range.begin() * src.rows / nStripes), src.rows);
|
||||
int row1 = min(cvRound(range.end() * src.rows / nStripes), src.rows);
|
||||
int row0 = min(cvRound(range.start * src.rows / nStripes), src.rows);
|
||||
int row1 = min(cvRound(range.end * src.rows / nStripes), src.rows);
|
||||
|
||||
/*if(0)
|
||||
printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
|
||||
src.rows, src.cols, range.begin(), range.end(), row0, row1);*/
|
||||
src.rows, src.cols, range.start, range.end, row0, row1);*/
|
||||
|
||||
Mat srcStripe = src.rowRange(row0, row1);
|
||||
Mat dstStripe = dst.rowRange(row0, row1);
|
||||
@@ -1173,15 +1173,15 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
|
||||
}
|
||||
|
||||
int nStripes = 1;
|
||||
#if defined HAVE_TBB && defined HAVE_TEGRA_OPTIMIZATION
|
||||
#if defined HAVE_TEGRA_OPTIMIZATION
|
||||
if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing
|
||||
(borderType & BORDER_ISOLATED) == 0 && //TODO: check border types
|
||||
src.rows >= 64 ) //NOTE: just heuristics
|
||||
nStripes = 4;
|
||||
#endif
|
||||
|
||||
parallel_for(BlockedRange(0, nStripes),
|
||||
MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
|
||||
parallel_for_(Range(0, nStripes),
|
||||
MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
|
||||
|
||||
//Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(),
|
||||
// kernel, anchor, borderType, borderType, borderValue );
|
||||
|
Reference in New Issue
Block a user