Reimplement thread management functions:
* Refactor auto-detection of parallel frameworks * Implement cv:getNumThreads, cv::setNumThreads and cv::getThreadNum for all supported frameworks * From now cv::setNumThreads(0) can be used to turn off parallelisation
This commit is contained in:
parent
b54f59de90
commit
460644b8a4
@ -42,139 +42,185 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
|
||||||
#if !defined HAVE_TBB && !defined HAVE_OPENMP && !defined HAVE_GCD && !defined HAVE_CONCURRENCY && !defined HAVE_CSTRIPES
|
#ifdef _OPENMP
|
||||||
#ifdef __APPLE__
|
#define HAVE_OPENMP
|
||||||
#define HAVE_GCD
|
|
||||||
#elif defined _MSC_VER && _MSC_VER >= 1600
|
|
||||||
#define HAVE_CONCURRENCY
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_CONCURRENCY
|
#ifdef __APPLE__
|
||||||
#include <ppl.h>
|
#define HAVE_GCD
|
||||||
#elif defined HAVE_OPENMP
|
#endif
|
||||||
#include <omp.h>
|
|
||||||
#elif defined HAVE_GCD
|
#if defined _MSC_VER && _MSC_VER >= 1600
|
||||||
#include <dispatch/dispatch.h>
|
#define HAVE_CONCURRENCY
|
||||||
#elif defined HAVE_TBB
|
#endif
|
||||||
|
|
||||||
|
/* IMPORTANT: always use the same order of defines
|
||||||
|
1. HAVE_TBB - 3rdparty library, should be explicitly enabled
|
||||||
|
2. HAVE_CSTRIPES - 3rdparty library, should be explicitly enabled
|
||||||
|
3. HAVE_OPENMP - integrated to compiler, should be explicitly enabled
|
||||||
|
4. HAVE_GCD - system wide, used automatically (APPLE only)
|
||||||
|
5. HAVE_CONCURRENCY - part of runtime, used automatically (Windows only - MSVS 10, MSVS 11)
|
||||||
|
*/
|
||||||
|
|
||||||
|
#if defined HAVE_TBB
|
||||||
#include "tbb/tbb_stddef.h"
|
#include "tbb/tbb_stddef.h"
|
||||||
#if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
|
#if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202
|
||||||
#include "tbb/tbb.h"
|
#include "tbb/tbb.h"
|
||||||
#include "tbb/task.h"
|
#include "tbb/task.h"
|
||||||
|
#if TBB_INTERFACE_VERSION >= 6100
|
||||||
|
#include "tbb/task_arena.h"
|
||||||
|
#endif
|
||||||
#undef min
|
#undef min
|
||||||
#undef max
|
#undef max
|
||||||
#else
|
#else
|
||||||
#undef HAVE_TBB
|
#undef HAVE_TBB
|
||||||
#endif // end TBB version
|
#endif // end TBB version
|
||||||
#elif defined HAVE_CSTRIPES
|
|
||||||
#include "C=.h"
|
|
||||||
#undef shared
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
#ifndef HAVE_TBB
|
||||||
HAVE_TBB - using TBB
|
#if defined HAVE_CSTRIPES
|
||||||
HAVE_GCD - using GCD
|
#include "C=.h"
|
||||||
HAVE_OPENMP - using OpenMP
|
#undef shared
|
||||||
HAVE_CONCURRENCY - using visual studio 2010 concurrency
|
#elif defined HAVE_OPENMP
|
||||||
*/
|
#include <omp.h>
|
||||||
|
#elif defined HAVE_GCD
|
||||||
|
#include <dispatch/dispatch.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
#include <ppl.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY
|
||||||
|
#define HAVE_PARALLEL_FRAMEWORK
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
|
ParallelLoopBody::~ParallelLoopBody() {}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
#ifdef HAVE_PARALLEL_FRAMEWORK
|
||||||
class ParallelLoopBodyWrapper
|
class ParallelLoopBodyWrapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ParallelLoopBodyWrapper(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
|
ParallelLoopBodyWrapper(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
|
||||||
{
|
{
|
||||||
body = &_body;
|
body = &_body;
|
||||||
wholeRange = _r;
|
wholeRange = _r;
|
||||||
double len = wholeRange.end - wholeRange.start;
|
double len = wholeRange.end - wholeRange.start;
|
||||||
nstripes = cvRound(_nstripes < 0 ? len : MIN(MAX(_nstripes, 1.), len));
|
nstripes = cvRound(_nstripes <= 0 ? len : MIN(MAX(_nstripes, 1.), len));
|
||||||
}
|
}
|
||||||
void operator()(const Range& sr) const
|
void operator()(const cv::Range& sr) const
|
||||||
{
|
{
|
||||||
Range r;
|
cv::Range r;
|
||||||
r.start = (int)(wholeRange.start +
|
r.start = (int)(wholeRange.start +
|
||||||
((size_t)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
|
((size_t)sr.start*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
|
||||||
r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
|
r.end = sr.end >= nstripes ? wholeRange.end : (int)(wholeRange.start +
|
||||||
((size_t)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
|
((size_t)sr.end*(wholeRange.end - wholeRange.start) + nstripes/2)/nstripes);
|
||||||
(*body)(r);
|
(*body)(r);
|
||||||
}
|
}
|
||||||
Range stripeRange() const { return Range(0, nstripes); }
|
cv::Range stripeRange() const { return cv::Range(0, nstripes); }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
const ParallelLoopBody* body;
|
const cv::ParallelLoopBody* body;
|
||||||
Range wholeRange;
|
cv::Range wholeRange;
|
||||||
int nstripes;
|
int nstripes;
|
||||||
};
|
};
|
||||||
|
|
||||||
ParallelLoopBody::~ParallelLoopBody() {}
|
|
||||||
|
|
||||||
#if defined HAVE_TBB
|
#if defined HAVE_TBB
|
||||||
class ProxyLoopBody : public ParallelLoopBodyWrapper
|
class ProxyLoopBody : public ParallelLoopBodyWrapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ProxyLoopBody(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
|
ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
|
||||||
: ParallelLoopBodyWrapper(_body, _r, _nstripes)
|
: ParallelLoopBodyWrapper(_body, _r, _nstripes)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void operator ()(const tbb::blocked_range<int>& range) const
|
void operator ()(const tbb::blocked_range<int>& range) const
|
||||||
{
|
{
|
||||||
this->ParallelLoopBodyWrapper::operator()(Range(range.begin(), range.end()));
|
this->ParallelLoopBodyWrapper::operator()(cv::Range(range.begin(), range.end()));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#elif defined HAVE_GCD
|
#elif defined HAVE_CSTRIPES || defined HAVE_OPENMP
|
||||||
|
|
||||||
typedef ParallelLoopBodyWrapper ProxyLoopBody;
|
typedef ParallelLoopBodyWrapper ProxyLoopBody;
|
||||||
static
|
#elif defined HAVE_GCD
|
||||||
void block_function(void* context, size_t index)
|
typedef ParallelLoopBodyWrapper ProxyLoopBody;
|
||||||
|
static void block_function(void* context, size_t index)
|
||||||
{
|
{
|
||||||
ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
|
ProxyLoopBody* ptr_body = static_cast<ProxyLoopBody*>(context);
|
||||||
(*ptr_body)(Range(index, index + 1));
|
(*ptr_body)(cv::Range(index, index + 1));
|
||||||
}
|
}
|
||||||
#elif defined HAVE_CONCURRENCY
|
#elif defined HAVE_CONCURRENCY
|
||||||
class ProxyLoopBody : public ParallelLoopBodyWrapper
|
class ProxyLoopBody : public ParallelLoopBodyWrapper
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ProxyLoopBody(const ParallelLoopBody& _body, const Range& _r, double _nstripes)
|
ProxyLoopBody(const cv::ParallelLoopBody& _body, const cv::Range& _r, double _nstripes)
|
||||||
: ParallelLoopBodyWrapper(_body, _r, _nstripes)
|
: ParallelLoopBodyWrapper(_body, _r, _nstripes)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
void operator ()(int i) const
|
void operator ()(int i) const
|
||||||
{
|
{
|
||||||
this->ParallelLoopBodyWrapper::operator()(Range(i, i + 1));
|
this->ParallelLoopBodyWrapper::operator()(cv::Range(i, i + 1));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
#else
|
#else
|
||||||
typedef ParallelLoopBodyWrapper ProxyLoopBody;
|
typedef ParallelLoopBodyWrapper ProxyLoopBody;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void parallel_for_(const Range& range, const ParallelLoopBody& body, double nstripes)
|
static int numThreads = -1;
|
||||||
|
|
||||||
|
#if defined HAVE_TBB
|
||||||
|
static tbb::task_scheduler_init tbbScheduler(tbb::task_scheduler_init::deferred);
|
||||||
|
#elif defined HAVE_CSTRIPES
|
||||||
|
// nothing for C=
|
||||||
|
#elif defined HAVE_OPENMP
|
||||||
|
static int numThreadsMax = omp_get_max_threads();
|
||||||
|
#elif defined HAVE_GCD
|
||||||
|
// nothing for GCD
|
||||||
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
class SchedPtr
|
||||||
|
{
|
||||||
|
Concurrency::Scheduler* sched_;
|
||||||
|
public:
|
||||||
|
Concurrency::Scheduler* operator->() { return sched_; }
|
||||||
|
operator Concurrency::Scheduler*() { return sched_; }
|
||||||
|
|
||||||
|
void operator=(Concurrency::Scheduler* sched)
|
||||||
|
{
|
||||||
|
if (sched_) sched_->Release();
|
||||||
|
sched_ = sched;
|
||||||
|
}
|
||||||
|
|
||||||
|
SchedPtr() : sched_(0) {}
|
||||||
|
~SchedPtr() { *this = 0; }
|
||||||
|
};
|
||||||
|
static SchedPtr pplScheduler;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // HAVE_PARALLEL_FRAMEWORK
|
||||||
|
|
||||||
|
} //namespace
|
||||||
|
|
||||||
|
/* ================================ parallel_for_ ================================ */
|
||||||
|
|
||||||
|
void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_PARALLEL_FRAMEWORK
|
||||||
|
|
||||||
|
if(numThreads != 0)
|
||||||
{
|
{
|
||||||
ProxyLoopBody pbody(body, range, nstripes);
|
ProxyLoopBody pbody(body, range, nstripes);
|
||||||
Range stripeRange = pbody.stripeRange();
|
cv::Range stripeRange = pbody.stripeRange();
|
||||||
|
|
||||||
#if defined HAVE_TBB
|
#if defined HAVE_TBB
|
||||||
|
|
||||||
tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
|
tbb::parallel_for(tbb::blocked_range<int>(stripeRange.start, stripeRange.end), pbody);
|
||||||
|
|
||||||
#elif defined HAVE_CONCURRENCY
|
|
||||||
|
|
||||||
Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
|
|
||||||
|
|
||||||
#elif defined HAVE_OPENMP
|
|
||||||
|
|
||||||
#pragma omp parallel for schedule(dynamic)
|
|
||||||
for (int i = stripeRange.start; i < stripeRange.end; ++i)
|
|
||||||
pbody(Range(i, i + 1));
|
|
||||||
|
|
||||||
#elif defined HAVE_GCD
|
|
||||||
|
|
||||||
dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
|
||||||
dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
|
|
||||||
|
|
||||||
#elif defined HAVE_CSTRIPES
|
#elif defined HAVE_CSTRIPES
|
||||||
|
|
||||||
parallel()
|
parallel(MAX(0, numThreads))
|
||||||
{
|
{
|
||||||
int offset = stripeRange.start;
|
int offset = stripeRange.start;
|
||||||
int len = stripeRange.end - offset;
|
int len = stripeRange.end - offset;
|
||||||
@ -183,58 +229,151 @@ namespace cv
|
|||||||
barrier();
|
barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined HAVE_OPENMP
|
||||||
|
|
||||||
|
#pragma omp parallel for schedule(dynamic)
|
||||||
|
for (int i = stripeRange.start; i < stripeRange.end; ++i)
|
||||||
|
pbody(Range(i, i + 1));
|
||||||
|
|
||||||
|
#elif defined HAVE_GCD
|
||||||
|
|
||||||
|
dispatch_queue_t concurrent_queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||||
|
dispatch_apply_f(stripeRange.end - stripeRange.start, concurrent_queue, &pbody, block_function);
|
||||||
|
|
||||||
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
|
||||||
|
if(!pplScheduler || pplScheduler->Id() == Concurrency::CurrentScheduler::Id())
|
||||||
|
{
|
||||||
|
Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pplScheduler->Attach();
|
||||||
|
Concurrency::parallel_for(stripeRange.start, stripeRange.end, pbody);
|
||||||
|
Concurrency::CurrentScheduler::Detach();
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
pbody(stripeRange);
|
#error You have hacked and compiling with unsupported parallel framework
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
|
||||||
} // namespace cv
|
#endif // HAVE_PARALLEL_FRAMEWORK
|
||||||
|
{
|
||||||
|
(void)nstripes;
|
||||||
static int numThreads = 0;
|
body(range);
|
||||||
static int numProcs = 0;
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int cv::getNumThreads(void)
|
int cv::getNumThreads(void)
|
||||||
{
|
{
|
||||||
if( !numProcs )
|
#ifdef HAVE_PARALLEL_FRAMEWORK
|
||||||
setNumThreads(0);
|
|
||||||
return numThreads;
|
if(numThreads == 0)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined HAVE_TBB
|
||||||
|
|
||||||
|
return tbbScheduler.is_active()
|
||||||
|
? numThreads
|
||||||
|
: tbb::task_scheduler_init::default_num_threads();
|
||||||
|
|
||||||
|
#elif defined HAVE_CSTRIPES
|
||||||
|
|
||||||
|
return cv::getNumberOfCPUs();
|
||||||
|
|
||||||
|
#elif defined HAVE_OPENMP
|
||||||
|
|
||||||
|
return omp_get_max_threads();
|
||||||
|
|
||||||
|
#elif defined HAVE_GCD
|
||||||
|
|
||||||
|
return 512; // the GCD thread pool limit
|
||||||
|
|
||||||
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
|
||||||
|
return 1 + (pplScheduler == 0
|
||||||
|
? Concurrency::CurrentScheduler::Get()->GetNumberOfVirtualProcessors()
|
||||||
|
: pplScheduler->GetNumberOfVirtualProcessors());
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::setNumThreads( int
|
void cv::setNumThreads( int threads )
|
||||||
#ifdef _OPENMP
|
|
||||||
threads
|
|
||||||
#endif
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
if( !numProcs )
|
#ifdef HAVE_PARALLEL_FRAMEWORK
|
||||||
{
|
numThreads = threads;
|
||||||
#ifdef _OPENMP
|
|
||||||
numProcs = omp_get_num_procs();
|
|
||||||
#else
|
|
||||||
numProcs = 1;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_TBB
|
||||||
|
|
||||||
|
if(tbbScheduler.is_active()) tbbScheduler.terminate();
|
||||||
|
if(threads > 0) tbbScheduler.initialize(threads);
|
||||||
|
|
||||||
|
#elif defined HAVE_CSTRIPES
|
||||||
|
|
||||||
|
return; // nothing needed
|
||||||
|
|
||||||
|
#elif defined HAVE_OPENMP
|
||||||
|
|
||||||
|
if(omp_in_parallel())
|
||||||
|
return; // can't change number of openmp threads inside a parallel region
|
||||||
|
|
||||||
|
omp_set_num_threads(threads > 0 ? threads : numThreadsMax);
|
||||||
|
|
||||||
|
#elif defined HAVE_GCD
|
||||||
|
|
||||||
|
// unsupported
|
||||||
|
// there is only private dispatch_queue_set_width() and only for desktop
|
||||||
|
|
||||||
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
|
||||||
|
if (threads <= 0)
|
||||||
|
{
|
||||||
|
pplScheduler = 0;
|
||||||
|
}
|
||||||
|
else if (threads == 1)
|
||||||
|
{
|
||||||
|
// Concurrency always uses >=2 threads, so we just disable it if 1 thread is requested
|
||||||
|
numThreads = 0;
|
||||||
|
}
|
||||||
|
else if (pplScheduler == 0 || 1 + pplScheduler->GetNumberOfVirtualProcessors() != (unsigned int)threads)
|
||||||
|
{
|
||||||
|
pplScheduler = Concurrency::Scheduler::Create(Concurrency::SchedulerPolicy(2,
|
||||||
|
Concurrency::PolicyElementKey::MinConcurrency, threads-1,
|
||||||
|
Concurrency::PolicyElementKey::MaxConcurrency, threads-1));
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef _OPENMP
|
|
||||||
if( threads <= 0 )
|
|
||||||
threads = numProcs;
|
|
||||||
else
|
|
||||||
threads = MIN( threads, numProcs );
|
|
||||||
|
|
||||||
numThreads = threads;
|
|
||||||
#else
|
|
||||||
numThreads = 1;
|
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int cv::getThreadNum(void)
|
int cv::getThreadNum(void)
|
||||||
{
|
{
|
||||||
#ifdef _OPENMP
|
#if defined HAVE_TBB
|
||||||
|
#if TBB_INTERFACE_VERSION >= 6100 && defined TBB_PREVIEW_TASK_ARENA && TBB_PREVIEW_TASK_ARENA
|
||||||
|
return tbb::task_arena::current_slot();
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
#elif defined HAVE_CSTRIPES
|
||||||
|
return pix();
|
||||||
|
#elif defined HAVE_OPENMP
|
||||||
return omp_get_thread_num();
|
return omp_get_thread_num();
|
||||||
|
#elif defined HAVE_GCD
|
||||||
|
return statc_cast<int>(pthread_self()); // no zero-based indexing
|
||||||
|
#elif defined HAVE_CONCURRENCY
|
||||||
|
return std::max(0, (int)Concurrency::Context::VirtualProcessorId()); // zero for master thread, unique number for others but not necessary 1,2,3,...
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user