Merge pull request #2998 from ernest-galbrun:cuda_concurrency

This commit is contained in:
Vadim Pisarevsky 2014-07-28 15:59:43 +00:00
commit 0c749fd7e5
3 changed files with 45 additions and 10 deletions

View File

@ -207,7 +207,6 @@ namespace
MemoryStack* MemoryPool::getFreeMemStack() MemoryStack* MemoryPool::getFreeMemStack()
{ {
AutoLock lock(mtx_); AutoLock lock(mtx_);
if (!initialized_) if (!initialized_)
initilizeImpl(); initilizeImpl();
@ -256,22 +255,31 @@ namespace
namespace namespace
{ {
Mutex mtx_;
bool memory_pool_manager_initialized;
class MemoryPoolManager class MemoryPoolManager
{ {
public: public:
MemoryPoolManager(); MemoryPoolManager();
~MemoryPoolManager(); ~MemoryPoolManager();
void Init();
MemoryPool* getPool(int deviceId); MemoryPool* getPool(int deviceId);
private: private:
std::vector<MemoryPool> pools_; std::vector<MemoryPool> pools_;
}; } manager;
//MemoryPoolManager ;
MemoryPoolManager::MemoryPoolManager() MemoryPoolManager::MemoryPoolManager()
{ {
int deviceCount = getCudaEnabledDeviceCount(); }
void MemoryPoolManager::Init()
{
int deviceCount = getCudaEnabledDeviceCount();
if (deviceCount > 0) if (deviceCount > 0)
pools_.resize(deviceCount); pools_.resize(deviceCount);
} }
@ -280,7 +288,7 @@ namespace
{ {
for (size_t i = 0; i < pools_.size(); ++i) for (size_t i = 0; i < pools_.size(); ++i)
{ {
cudaSetDevice(i); cudaSetDevice(static_cast<int>(i));
pools_[i].release(); pools_[i].release();
} }
} }
@ -293,7 +301,14 @@ namespace
MemoryPool* memPool(int deviceId) MemoryPool* memPool(int deviceId)
{ {
static MemoryPoolManager manager; {
AutoLock lock(mtx_);
if (!memory_pool_manager_initialized)
{
memory_pool_manager_initialized = true;
manager.Init();
}
}
return manager.getPool(deviceId); return manager.getPool(deviceId);
} }
} }
@ -311,8 +326,10 @@ cv::cuda::StackAllocator::StackAllocator(cudaStream_t stream) : stream_(stream),
if (enableMemoryPool) if (enableMemoryPool)
{ {
const int deviceId = getDevice(); const int deviceId = getDevice();
memStack_ = memPool(deviceId)->getFreeMemStack(); {
AutoLock lock(mtx_);
memStack_ = memPool(deviceId)->getFreeMemStack();
}
DeviceInfo devInfo(deviceId); DeviceInfo devInfo(deviceId);
alignment_ = devInfo.textureAlignment(); alignment_ = devInfo.textureAlignment();
} }

View File

@ -190,10 +190,22 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
#endif #endif
} }
namespace
{
bool default_stream_is_initialized;
Mutex mtx;
Ptr<Stream> default_stream;
}
Stream& cv::cuda::Stream::Null() Stream& cv::cuda::Stream::Null()
{ {
static Stream s(Ptr<Impl>(new Impl(0))); AutoLock lock(mtx);
return s; if (!default_stream_is_initialized)
{
default_stream = Ptr<Stream>(new Stream(Ptr<Impl>(new Impl(0))));
default_stream_is_initialized = true;
}
return *default_stream;
} }
cv::cuda::Stream::operator bool_type() const cv::cuda::Stream::operator bool_type() const

View File

@ -93,6 +93,8 @@ using namespace ::cv::cuda::device::surf;
namespace namespace
{ {
Mutex mtx;
int calcSize(int octave, int layer) int calcSize(int octave, int layer)
{ {
/* Wavelet size at first layer of first octave. */ /* Wavelet size at first layer of first octave. */
@ -166,7 +168,6 @@ namespace
{ {
const int layer_rows = img_rows >> octave; const int layer_rows = img_rows >> octave;
const int layer_cols = img_cols >> octave; const int layer_cols = img_cols >> octave;
loadOctaveConstants(octave, layer_rows, layer_cols); loadOctaveConstants(octave, layer_rows, layer_cols);
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers); icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, img_rows, img_cols, octave, surf_.nOctaveLayers);
@ -354,6 +355,7 @@ void cv::cuda::SURF_CUDA::downloadDescriptors(const GpuMat& descriptorsGPU, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints) void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints)
{ {
AutoLock lock(mtx);
if (!img.empty()) if (!img.empty())
{ {
SURF_CUDA_Invoker surf(*this, img, mask); SURF_CUDA_Invoker surf(*this, img, mask);
@ -365,6 +367,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors, void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuMat& keypoints, GpuMat& descriptors,
bool useProvidedKeypoints) bool useProvidedKeypoints)
{ {
AutoLock lock(mtx);
if (!img.empty()) if (!img.empty())
{ {
SURF_CUDA_Invoker surf(*this, img, mask); SURF_CUDA_Invoker surf(*this, img, mask);
@ -382,6 +385,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, GpuM
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints) void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints)
{ {
AutoLock lock(mtx);
GpuMat keypointsGPU; GpuMat keypointsGPU;
(*this)(img, mask, keypointsGPU); (*this)(img, mask, keypointsGPU);
@ -392,6 +396,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
GpuMat& descriptors, bool useProvidedKeypoints) GpuMat& descriptors, bool useProvidedKeypoints)
{ {
AutoLock lock(mtx);
GpuMat keypointsGPU; GpuMat keypointsGPU;
if (useProvidedKeypoints) if (useProvidedKeypoints)
@ -405,6 +410,7 @@ void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std:
void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints, void cv::cuda::SURF_CUDA::operator()(const GpuMat& img, const GpuMat& mask, std::vector<KeyPoint>& keypoints,
std::vector<float>& descriptors, bool useProvidedKeypoints) std::vector<float>& descriptors, bool useProvidedKeypoints)
{ {
AutoLock lock(mtx);
GpuMat descriptorsGPU; GpuMat descriptorsGPU;
(*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints); (*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);