added recommended number of stripes to parallel_for_, modified some of the functions to use larger stripes (for better performance)
This commit is contained in:
parent
821de96346
commit
354a5f2686
@ -4614,11 +4614,11 @@ protected:
|
|||||||
class CV_EXPORTS ParallelLoopBody
|
class CV_EXPORTS ParallelLoopBody
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
virtual void operator() (const Range& range) const = 0;
|
|
||||||
virtual ~ParallelLoopBody();
|
virtual ~ParallelLoopBody();
|
||||||
|
virtual void operator() (const Range& range) const = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body);
|
CV_EXPORTS void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes=-1.);
|
||||||
|
|
||||||
/////////////////////////// Synchronization Primitives ///////////////////////////////
|
/////////////////////////// Synchronization Primitives ///////////////////////////////
|
||||||
|
|
||||||
|
@ -80,87 +80,114 @@
|
|||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
ParallelLoopBody::~ParallelLoopBody() { }
|
class ParallelLoopBodyWrapper
|
||||||
|
|
||||||
#ifdef HAVE_TBB
|
|
||||||
class TbbProxyLoopBody
|
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
TbbProxyLoopBody(const ParallelLoopBody& _body) :
|
ParallelLoopBodyWrapper(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
|
||||||
body(&_body)
|
{
|
||||||
{ }
|
body = &_body;
|
||||||
|
wholeRange = _r;
|
||||||
|
double len = wholeRange.end - wholeRange.start;
|
||||||
|
nstripes = cvRound(_nstripes < 0 ? len : MIN(MAX(_nstripes, 1.), len));
|
||||||
|
}
|
||||||
|
void operator()(const Range& sr) const
|
||||||
|
{
|
||||||
|
Range r;
|
||||||
|
r.start = (int)(wholeRange.start +
|
||||||
|
((size_t)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
|
||||||
|
r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
|
||||||
|
((size_t)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
|
||||||
|
(*body)(r);
|
||||||
|
}
|
||||||
|
Range stripeRange() const { return Range(0, nstripes); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const ParallelLoopBody* body;
|
||||||
|
Range wholeRange;
|
||||||
|
int nstripes;
|
||||||
|
};
|
||||||
|
|
||||||
|
ParallelLoopBody::~ParallelLoopBody() {}
|
||||||
|
|
||||||
|
#if defined HAVE_TBB
|
||||||
|
class ProxyLoopBody : public ParallelLoopBodyWrapper
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
ProxyLoopBody(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
|
||||||
|
: ParallelLoopBodyWrapper(_body, _r, _nstripes)
|
||||||
|
{}
|
||||||
|
|
||||||
void operator ()(const tbb::blocked_range<int>& range) const
|
void operator ()(const tbb::blocked_range<int>& range) const
|
||||||
{
|
{
|
||||||
body->operator()(Range(range.begin(), range.end()));
|
(*this)(Range(range.begin(), range.end()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
|
||||||
const ParallelLoopBody* body;
|
|
||||||
};
|
};
|
||||||
#endif // end HAVE_TBB
|
#elif defined HAVE_GCD
|
||||||
|
|
||||||
#ifdef HAVE_GCD
|
typedef ParallelLoopBodyWrapper ProxyLoopBody;
|
||||||
static
|
static
|
||||||
void block_function(void* context, size_t index)
|
void block_function(void* context, size_t index)
|
||||||
{
|
{
|
||||||
ParallelLoopBody* ptr_body = static_cast<ParallelLoopBody*>(context);
|
ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
|
||||||
ptr_body->operator()(Range(index, index + 1));
|
(*ptr_body)(Range(index, index + 1));
|
||||||
}
|
}
|
||||||
#endif // HAVE_GCD
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
class ProxyLoopBody : public ParallelLoopBodyWrapper
|
||||||
void parallel_for_(const Range& range, const ParallelLoopBody& body)
|
|
||||||
{
|
{
|
||||||
#ifdef HAVE_TBB
|
public:
|
||||||
|
ProxyLoopBody(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
|
||||||
|
: ParallelLoopBodyWrapper(_body, _r, _nstripes)
|
||||||
|
{}
|
||||||
|
|
||||||
|
void operator ()(int i) const
|
||||||
|
{
|
||||||
|
(*this)(Range(i, i + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
typedef ParallelLoopBodyWrapper ProxyLoopBody;
|
||||||
|
#endif
|
||||||
|
|
||||||
tbb::parallel_for(tbb::blocked_range<int>(range.start, range.end), TbbProxyLoopBody(body));
|
void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes)
|
||||||
|
{
|
||||||
|
ProxyLoopBody pbody(body, range, nstripes);
|
||||||
|
Range stripeRange = pbody.stripeRange();
|
||||||
|
|
||||||
|
#if defined HAVE_TBB
|
||||||
|
|
||||||
|
tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
|
||||||
|
|
||||||
#elif defined HAVE_CONCURRENCY
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
|
||||||
class ConcurrencyProxyLoopBody
|
Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
|
||||||
{
|
|
||||||
public:
|
|
||||||
ConcurrencyProxyLoopBody(const ParallelLoopBody& body) : _body(body) {}
|
|
||||||
|
|
||||||
void operator ()(int i) const
|
|
||||||
{
|
|
||||||
_body(Range(i, i + 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
const ParallelLoopBody& _body;
|
|
||||||
ConcurrencyProxyLoopBody& operator=(const ConcurrencyProxyLoopBody&) {return *this;}
|
|
||||||
} proxy(body);
|
|
||||||
|
|
||||||
Concurrency::parallel_for(range.start, range.end, proxy);
|
|
||||||
|
|
||||||
#elif defined HAVE_OPENMP
|
#elif defined HAVE_OPENMP
|
||||||
|
|
||||||
#pragma omp parallel for schedule(dynamic)
|
#pragma omp parallel for schedule(dynamic)
|
||||||
for (int i = range.start; i < range.end; ++i)
|
for (int i = stripeRange.start; i < stripeRange.end; ++i)
|
||||||
body(Range(i, i + 1));
|
pbody(Range(i, i + 1));
|
||||||
|
|
||||||
#elif defined HAVE_GCD
|
#elif defined HAVE_GCD
|
||||||
|
|
||||||
dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||||
dispatch_apply_f(range.end - range.start, concurrent_queue, &const_cast<ParallelLoopBody&>(body), block_function);
|
dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
|
||||||
|
|
||||||
#elif defined HAVE_CSTRIPES
|
#elif defined HAVE_CSTRIPES
|
||||||
|
|
||||||
parallel()
|
parallel()
|
||||||
{
|
{
|
||||||
int offset = range.start;
|
int offset = stripeRange.start;
|
||||||
int len = range.end - offset;
|
int len = stripeRange.end - offset;
|
||||||
Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
|
Range r(offset + CPX_RANGE_START(len), offset + CPX_RANGE_END(len));
|
||||||
body(r);
|
pbody(r);
|
||||||
barrier();
|
barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
body(range);
|
pbody(stripeRange);
|
||||||
|
|
||||||
#endif // end HAVE_TBB
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace cv
|
} // namespace cv
|
||||||
|
@ -187,7 +187,7 @@ private:
|
|||||||
template <typename Cvt>
|
template <typename Cvt>
|
||||||
void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
|
void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt)
|
||||||
{
|
{
|
||||||
parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt));
|
parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker<Cvt>(src, dst, cvt), src.total()/(double)(1<<16) );
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
////////////////// Various 3/4-channel to 3/4-channel RGB transformations /////////////////
|
||||||
|
@ -357,7 +357,7 @@ resizeNN( const Mat& src, Mat& dst, double fx, double fy )
|
|||||||
|
|
||||||
Range range(0, dsize.height);
|
Range range(0, dsize.height);
|
||||||
resizeNNInvoker invoker(src, dst, x_ofs, pix_size4, ify);
|
resizeNNInvoker invoker(src, dst, x_ofs, pix_size4, ify);
|
||||||
parallel_for_(range, invoker);
|
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1222,7 +1222,7 @@ static void resizeGeneric_( const Mat& src, Mat& dst,
|
|||||||
Range range(0, dsize.height);
|
Range range(0, dsize.height);
|
||||||
resizeGeneric_Invoker<HResize, VResize> invoker(src, dst, xofs, yofs, (const AT*)_alpha, beta,
|
resizeGeneric_Invoker<HResize, VResize> invoker(src, dst, xofs, yofs, (const AT*)_alpha, beta,
|
||||||
ssize, dsize, ksize, xmin, xmax);
|
ssize, dsize, ksize, xmin, xmax);
|
||||||
parallel_for_(range, invoker);
|
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename WT>
|
template <typename T, typename WT>
|
||||||
@ -1381,7 +1381,7 @@ static void resizeAreaFast_( const Mat& src, Mat& dst, const int* ofs, const int
|
|||||||
Range range(0, dst.rows);
|
Range range(0, dst.rows);
|
||||||
resizeAreaFast_Invoker<T, WT, VecOp> invoker(src, dst, scale_x,
|
resizeAreaFast_Invoker<T, WT, VecOp> invoker(src, dst, scale_x,
|
||||||
scale_y, ofs, xofs);
|
scale_y, ofs, xofs);
|
||||||
parallel_for_(range, invoker);
|
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct DecimateAlpha
|
struct DecimateAlpha
|
||||||
@ -2680,14 +2680,14 @@ typedef void (*RemapFunc)(const Mat& _src, Mat& _dst, const Mat& _xy,
|
|||||||
const Mat& _fxy, const void* _wtab,
|
const Mat& _fxy, const void* _wtab,
|
||||||
int borderType, const Scalar& _borderValue);
|
int borderType, const Scalar& _borderValue);
|
||||||
|
|
||||||
class remapInvoker :
|
class RemapInvoker :
|
||||||
public ParallelLoopBody
|
public ParallelLoopBody
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
remapInvoker(const Mat& _src, Mat _dst, const Mat& _map1, const Mat& _map2, const Mat *_m1,
|
RemapInvoker(const Mat& _src, Mat& _dst, const Mat *_m1,
|
||||||
const Mat *_m2, int _interpolation, int _borderType, const Scalar &_borderValue,
|
const Mat *_m2, int _interpolation, int _borderType, const Scalar &_borderValue,
|
||||||
int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
|
int _planar_input, RemapNNFunc _nnfunc, RemapFunc _ifunc, const void *_ctab) :
|
||||||
ParallelLoopBody(), src(_src), dst(_dst), map1(_map1), map2(_map2), m1(_m1), m2(_m2),
|
ParallelLoopBody(), src(&_src), dst(&_dst), m1(_m1), m2(_m2),
|
||||||
interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue),
|
interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue),
|
||||||
planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
|
planar_input(_planar_input), nnfunc(_nnfunc), ifunc(_ifunc), ctab(_ctab)
|
||||||
{
|
{
|
||||||
@ -2697,9 +2697,9 @@ public:
|
|||||||
{
|
{
|
||||||
int x, y, x1, y1;
|
int x, y, x1, y1;
|
||||||
const int buf_size = 1 << 14;
|
const int buf_size = 1 << 14;
|
||||||
int brows0 = std::min(128, dst.rows), map_depth = map1.depth();
|
int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
|
||||||
int bcols0 = std::min(buf_size/brows0, dst.cols);
|
int bcols0 = std::min(buf_size/brows0, dst->cols);
|
||||||
brows0 = std::min(buf_size/bcols0, dst.rows);
|
brows0 = std::min(buf_size/bcols0, dst->rows);
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
|
bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
|
||||||
#endif
|
#endif
|
||||||
@ -2710,17 +2710,17 @@ public:
|
|||||||
|
|
||||||
for( y = range.start; y < range.end; y += brows0 )
|
for( y = range.start; y < range.end; y += brows0 )
|
||||||
{
|
{
|
||||||
for( x = 0; x < dst.cols; x += bcols0 )
|
for( x = 0; x < dst->cols; x += bcols0 )
|
||||||
{
|
{
|
||||||
int brows = std::min(brows0, range.end - y);
|
int brows = std::min(brows0, range.end - y);
|
||||||
int bcols = std::min(bcols0, dst.cols - x);
|
int bcols = std::min(bcols0, dst->cols - x);
|
||||||
Mat dpart(dst, Rect(x, y, bcols, brows));
|
Mat dpart(*dst, Rect(x, y, bcols, brows));
|
||||||
Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
|
Mat bufxy(_bufxy, Rect(0, 0, bcols, brows));
|
||||||
|
|
||||||
if( nnfunc )
|
if( nnfunc )
|
||||||
{
|
{
|
||||||
if( map1.type() == CV_16SC2 && !map2.data ) // the data is already in the right format
|
if( m1->type() == CV_16SC2 && !m2->data ) // the data is already in the right format
|
||||||
bufxy = map1(Rect(x, y, bcols, brows));
|
bufxy = (*m1)(Rect(x, y, bcols, brows));
|
||||||
else if( map_depth != CV_32F )
|
else if( map_depth != CV_32F )
|
||||||
{
|
{
|
||||||
for( y1 = 0; y1 < brows; y1++ )
|
for( y1 = 0; y1 < brows; y1++ )
|
||||||
@ -2738,14 +2738,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if( !planar_input )
|
else if( !planar_input )
|
||||||
map1(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
|
(*m1)(Rect(x, y, bcols, brows)).convertTo(bufxy, bufxy.depth());
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for( y1 = 0; y1 < brows; y1++ )
|
for( y1 = 0; y1 < brows; y1++ )
|
||||||
{
|
{
|
||||||
short* XY = (short*)(bufxy.data + bufxy.step*y1);
|
short* XY = (short*)(bufxy.data + bufxy.step*y1);
|
||||||
const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
|
const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
|
||||||
const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
|
const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
|
||||||
x1 = 0;
|
x1 = 0;
|
||||||
|
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
@ -2778,7 +2778,7 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nnfunc( src, dpart, bufxy, borderType, borderValue );
|
nnfunc( *src, dpart, bufxy, borderType, borderValue );
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2788,16 +2788,15 @@ public:
|
|||||||
short* XY = (short*)(bufxy.data + bufxy.step*y1);
|
short* XY = (short*)(bufxy.data + bufxy.step*y1);
|
||||||
ushort* A = (ushort*)(bufa.data + bufa.step*y1);
|
ushort* A = (ushort*)(bufa.data + bufa.step*y1);
|
||||||
|
|
||||||
if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ||
|
if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
|
||||||
(map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1)) )
|
|
||||||
{
|
{
|
||||||
bufxy = m1->operator()(Rect(x, y, bcols, brows));
|
bufxy = (*m1)(Rect(x, y, bcols, brows));
|
||||||
bufa = m2->operator()(Rect(x, y, bcols, brows));
|
bufa = (*m2)(Rect(x, y, bcols, brows));
|
||||||
}
|
}
|
||||||
else if( planar_input )
|
else if( planar_input )
|
||||||
{
|
{
|
||||||
const float* sX = (const float*)(map1.data + map1.step*(y+y1)) + x;
|
const float* sX = (const float*)(m1->data + m1->step*(y+y1)) + x;
|
||||||
const float* sY = (const float*)(map2.data + map2.step*(y+y1)) + x;
|
const float* sY = (const float*)(m2->data + m2->step*(y+y1)) + x;
|
||||||
|
|
||||||
x1 = 0;
|
x1 = 0;
|
||||||
#if CV_SSE2
|
#if CV_SSE2
|
||||||
@ -2850,7 +2849,7 @@ public:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
const float* sXY = (const float*)(map1.data + map1.step*(y+y1)) + x*2;
|
const float* sXY = (const float*)(m1->data + m1->step*(y+y1)) + x*2;
|
||||||
|
|
||||||
for( x1 = 0; x1 < bcols; x1++ )
|
for( x1 = 0; x1 < bcols; x1++ )
|
||||||
{
|
{
|
||||||
@ -2863,15 +2862,14 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ifunc(src, dpart, bufxy, bufa, ctab, borderType, borderValue);
|
ifunc(*src, dpart, bufxy, bufa, ctab, borderType, borderValue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Mat src;
|
const Mat* src;
|
||||||
Mat dst;
|
Mat* dst;
|
||||||
Mat map1, map2;
|
|
||||||
const Mat *m1, *m2;
|
const Mat *m1, *m2;
|
||||||
int interpolation, borderType;
|
int interpolation, borderType;
|
||||||
Scalar borderValue;
|
Scalar borderValue;
|
||||||
@ -2961,8 +2959,8 @@ void cv::remap( InputArray _src, OutputArray _dst,
|
|||||||
|
|
||||||
const Mat *m1 = &map1, *m2 = &map2;
|
const Mat *m1 = &map1, *m2 = &map2;
|
||||||
|
|
||||||
if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ||
|
if( (map1.type() == CV_16SC2 && (map2.type() == CV_16UC1 || map2.type() == CV_16SC1 || !map2.data)) ||
|
||||||
(map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1)) )
|
(map2.type() == CV_16SC2 && (map1.type() == CV_16UC1 || map1.type() == CV_16SC1 || !map1.data)) )
|
||||||
{
|
{
|
||||||
if( map1.type() != CV_16SC2 )
|
if( map1.type() != CV_16SC2 )
|
||||||
std::swap(m1, m2);
|
std::swap(m1, m2);
|
||||||
@ -2974,11 +2972,10 @@ void cv::remap( InputArray _src, OutputArray _dst,
|
|||||||
planar_input = map1.channels() == 1;
|
planar_input = map1.channels() == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Range range(0, dst.rows);
|
RemapInvoker invoker(src, dst, m1, m2, interpolation,
|
||||||
remapInvoker invoker(src, dst, map1, map2, m1, m2, interpolation,
|
|
||||||
borderType, borderValue, planar_input, nnfunc, ifunc,
|
borderType, borderValue, planar_input, nnfunc, ifunc,
|
||||||
ctab);
|
ctab);
|
||||||
parallel_for_(range, invoker);
|
parallel_for_(Range(0, dst.rows), invoker, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3300,7 +3297,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
|
|||||||
Range range(0, dst.rows);
|
Range range(0, dst.rows);
|
||||||
warpAffineInvoker invoker(src, dst, interpolation, borderType,
|
warpAffineInvoker invoker(src, dst, interpolation, borderType,
|
||||||
borderValue, adelta, bdelta, M);
|
borderValue, adelta, bdelta, M);
|
||||||
parallel_for_(range, invoker);
|
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -3430,7 +3427,7 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
|
|||||||
|
|
||||||
Range range(0, dst.rows);
|
Range range(0, dst.rows);
|
||||||
warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
|
warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
|
||||||
parallel_for_(range, invoker);
|
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1919,7 +1919,7 @@ bilateralFilter_8u( const Mat& src, Mat& dst, int d,
|
|||||||
}
|
}
|
||||||
|
|
||||||
BilateralFilter_8u_Invoker body(dst, temp, radius, maxk, space_ofs, space_weight, color_weight);
|
BilateralFilter_8u_Invoker body(dst, temp, radius, maxk, space_ofs, space_weight, color_weight);
|
||||||
parallel_for_(Range(0, size.height), body);
|
parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2189,7 +2189,7 @@ bilateralFilter_32f( const Mat& src, Mat& dst, int d,
|
|||||||
// parallel_for usage
|
// parallel_for usage
|
||||||
|
|
||||||
BilateralFilter_32f_Invoker body(cn, radius, maxk, space_ofs, temp, dst, scale_index, space_weight, expLUT);
|
BilateralFilter_32f_Invoker body(cn, radius, maxk, space_ofs, temp, dst, scale_index, space_weight, expLUT);
|
||||||
parallel_for_(Range(0, size.height), body);
|
parallel_for_(Range(0, size.height), body, dst.total()/(double)(1<<16));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -664,13 +664,11 @@ getThreshVal_Otsu_8u( const Mat& _src )
|
|||||||
class ThresholdRunner : public ParallelLoopBody
|
class ThresholdRunner : public ParallelLoopBody
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ThresholdRunner(Mat _src, Mat _dst, int _nStripes, double _thresh, double _maxval, int _thresholdType)
|
ThresholdRunner(Mat _src, Mat _dst, double _thresh, double _maxval, int _thresholdType)
|
||||||
{
|
{
|
||||||
src = _src;
|
src = _src;
|
||||||
dst = _dst;
|
dst = _dst;
|
||||||
|
|
||||||
nStripes = _nStripes;
|
|
||||||
|
|
||||||
thresh = _thresh;
|
thresh = _thresh;
|
||||||
maxval = _maxval;
|
maxval = _maxval;
|
||||||
thresholdType = _thresholdType;
|
thresholdType = _thresholdType;
|
||||||
@ -678,13 +676,8 @@ public:
|
|||||||
|
|
||||||
void operator () ( const Range& range ) const
|
void operator () ( const Range& range ) const
|
||||||
{
|
{
|
||||||
int row0 = std::min(cvRound(range.start * src.rows / nStripes), src.rows);
|
int row0 = range.start;
|
||||||
int row1 = range.end >= nStripes ? src.rows :
|
int row1 = range.end;
|
||||||
std::min(cvRound(range.end * src.rows / nStripes), src.rows);
|
|
||||||
|
|
||||||
/*if(0)
|
|
||||||
printf("Size = (%d, %d), range[%d,%d), row0 = %d, row1 = %d\n",
|
|
||||||
src.rows, src.cols, range.begin(), range.end(), row0, row1);*/
|
|
||||||
|
|
||||||
Mat srcStripe = src.rowRange(row0, row1);
|
Mat srcStripe = src.rowRange(row0, row1);
|
||||||
Mat dstStripe = dst.rowRange(row0, row1);
|
Mat dstStripe = dst.rowRange(row0, row1);
|
||||||
@ -789,10 +782,9 @@ double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double m
|
|||||||
else
|
else
|
||||||
CV_Error( CV_StsUnsupportedFormat, "" );
|
CV_Error( CV_StsUnsupportedFormat, "" );
|
||||||
|
|
||||||
size_t nStripes = (src.total() + (1<<15)) >> 16;
|
parallel_for_(Range(0, dst.rows),
|
||||||
nStripes = MAX(MIN(nStripes, (size_t)4), (size_t)1);
|
ThresholdRunner(src, dst, thresh, maxval, type),
|
||||||
parallel_for_(Range(0, (int)nStripes),
|
dst.total()/(double)(1<<16));
|
||||||
ThresholdRunner(src, dst, nStripes, thresh, maxval, type));
|
|
||||||
return thresh;
|
return thresh;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -298,7 +298,7 @@ namespace
|
|||||||
|
|
||||||
void cv::BackgroundSubtractorGMG::operator ()(InputArray _frame, OutputArray _fgmask, double newLearningRate)
|
void cv::BackgroundSubtractorGMG::operator ()(InputArray _frame, OutputArray _fgmask, double newLearningRate)
|
||||||
{
|
{
|
||||||
cv::Mat frame = _frame.getMat();
|
Mat frame = _frame.getMat();
|
||||||
|
|
||||||
CV_Assert(frame.depth() == CV_8U || frame.depth() == CV_16U || frame.depth() == CV_32F);
|
CV_Assert(frame.depth() == CV_8U || frame.depth() == CV_16U || frame.depth() == CV_32F);
|
||||||
CV_Assert(frame.channels() == 1 || frame.channels() == 3 || frame.channels() == 4);
|
CV_Assert(frame.channels() == 1 || frame.channels() == 3 || frame.channels() == 4);
|
||||||
@ -313,16 +313,16 @@ void cv::BackgroundSubtractorGMG::operator ()(InputArray _frame, OutputArray _fg
|
|||||||
initialize(frame.size(), 0.0, frame.depth() == CV_8U ? 255.0 : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0);
|
initialize(frame.size(), 0.0, frame.depth() == CV_8U ? 255.0 : frame.depth() == CV_16U ? std::numeric_limits<ushort>::max() : 1.0);
|
||||||
|
|
||||||
_fgmask.create(frameSize_, CV_8UC1);
|
_fgmask.create(frameSize_, CV_8UC1);
|
||||||
cv::Mat fgmask = _fgmask.getMat();
|
Mat fgmask = _fgmask.getMat();
|
||||||
|
|
||||||
GMG_LoopBody body(frame, fgmask, nfeatures_, colors_, weights_,
|
GMG_LoopBody body(frame, fgmask, nfeatures_, colors_, weights_,
|
||||||
maxFeatures, learningRate, numInitializationFrames, quantizationLevels, backgroundPrior, decisionThreshold,
|
maxFeatures, learningRate, numInitializationFrames, quantizationLevels, backgroundPrior, decisionThreshold,
|
||||||
maxVal_, minVal_, frameNum_, updateBackgroundModel);
|
maxVal_, minVal_, frameNum_, updateBackgroundModel);
|
||||||
cv::parallel_for_(cv::Range(0, frame.rows), body);
|
parallel_for_(Range(0, frame.rows), body, frame.total()/(double)(1<<16));
|
||||||
|
|
||||||
if (smoothingRadius > 0)
|
if (smoothingRadius > 0)
|
||||||
{
|
{
|
||||||
cv::medianBlur(fgmask, buf_, smoothingRadius);
|
medianBlur(fgmask, buf_, smoothingRadius);
|
||||||
cv::swap(fgmask, buf_);
|
cv::swap(fgmask, buf_);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user