diff --git a/modules/core/include/opencv2/core/bufferpool.hpp b/modules/core/include/opencv2/core/bufferpool.hpp new file mode 100644 index 000000000..c2de95a9f --- /dev/null +++ b/modules/core/include/opencv2/core/bufferpool.hpp @@ -0,0 +1,26 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. + +#ifndef __OPENCV_CORE_BUFFER_POOL_HPP__ +#define __OPENCV_CORE_BUFFER_POOL_HPP__ + +namespace cv +{ + +class BufferPoolController +{ +protected: + ~BufferPoolController() { } +public: + virtual size_t getReservedSize() const = 0; + virtual size_t getMaxReservedSize() const = 0; + virtual void setMaxReservedSize(size_t size) = 0; + virtual void freeAllReservedBuffers() = 0; +}; + +} + +#endif // __OPENCV_CORE_BUFFER_POOL_HPP__ diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index d9f06cb8e..8099520ec 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -51,6 +51,7 @@ #include "opencv2/core/matx.hpp" #include "opencv2/core/types.hpp" +#include "opencv2/core/bufferpool.hpp" namespace cv { @@ -299,6 +300,9 @@ public: virtual void copy(UMatData* srcdata, UMatData* dstdata, int dims, const size_t sz[], const size_t srcofs[], const size_t srcstep[], const size_t dstofs[], const size_t dststep[], bool sync) const; + + // default implementation returns DummyBufferPoolController + virtual BufferPoolController* getBufferPoolController() const; }; @@ -363,7 +367,7 @@ struct CV_EXPORTS UMatData int refcount; uchar* data; uchar* origdata; - size_t size; + size_t size, capacity; int flags; void* handle; diff --git a/modules/core/include/opencv2/core/ocl.hpp b/modules/core/include/opencv2/core/ocl.hpp index 3a28a3fdc..0df3b8148 100644 --- a/modules/core/include/opencv2/core/ocl.hpp +++ b/modules/core/include/opencv2/core/ocl.hpp @@ -596,6 +596,9 @@ protected: Impl* p; }; + +CV_EXPORTS MatAllocator* getOpenCLAllocator(); + }} #endif diff --git a/modules/core/perf/opencl/perf_bufferpool.cpp b/modules/core/perf/opencl/perf_bufferpool.cpp new file mode 100644 index 000000000..abb075751 --- /dev/null +++ b/modules/core/perf/opencl/perf_bufferpool.cpp @@ -0,0 +1,132 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. + +#include "perf_precomp.hpp" +#include "opencv2/ts/ocl_perf.hpp" + +#ifdef HAVE_OPENCL + +namespace cvtest { +namespace ocl { + +struct BufferPoolState +{ + BufferPoolController* controller_; + size_t oldMaxReservedSize_; + + BufferPoolState(BufferPoolController* c, bool enable) + : controller_(c) + { + if (!cv::ocl::useOpenCL()) + { + throw ::perf::TestBase::PerfSkipTestException(); + } + oldMaxReservedSize_ = c->getMaxReservedSize(); + if (oldMaxReservedSize_ == (size_t)-1) + { + throw ::perf::TestBase::PerfSkipTestException(); + } + if (!enable) + { + c->setMaxReservedSize(0); + } + else + { + c->freeAllReservedBuffers(); + } + } + + ~BufferPoolState() + { + controller_->setMaxReservedSize(oldMaxReservedSize_); + } +}; + +typedef TestBaseWithParam BufferPoolFixture; + +OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCreation100, Bool()) +{ + BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam()); + + Size sz(1920, 1080); + + OCL_TEST_CYCLE() + { + for (int i = 0; i < 100; i++) + { + UMat u(sz, CV_8UC1); + } + } + + SANITY_CHECK_NOTHING() +} + +OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCountNonZero100, Bool()) +{ + BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam()); + + Size sz(1920, 1080); + + OCL_TEST_CYCLE() + { + for (int i = 0; i < 100; i++) + { + UMat u(sz, CV_8UC1); + countNonZero(u); + } + } + + SANITY_CHECK_NOTHING() +} + +OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatCanny10, Bool()) +{ + BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam()); + + Size sz(1920, 1080); + + int aperture = 3; + bool useL2 = false; + double thresh_low = 100; + double thresh_high = 120; + + OCL_TEST_CYCLE() + { + for (int i = 0; i < 10; i++) + { + UMat src(sz, CV_8UC1); + UMat dst; + Canny(src, dst, thresh_low, thresh_high, aperture, useL2); + dst.getMat(ACCESS_READ); // complete async operations + } + } + + SANITY_CHECK_NOTHING() +} + +OCL_PERF_TEST_P(BufferPoolFixture, BufferPool_UMatIntegral10, Bool()) +{ + BufferPoolState s(cv::ocl::getOpenCLAllocator()->getBufferPoolController(), GetParam()); + + Size sz(1920, 1080); + + OCL_TEST_CYCLE() + { + for (int i = 0; i < 10; i++) + { + UMat src(sz, CV_32FC1); + UMat dst; + integral(src, dst); + dst.getMat(ACCESS_READ); // complete async operations + } + } + + SANITY_CHECK_NOTHING() +} + +} } // namespace cvtest::ocl + +#endif // HAVE_OPENCL diff --git a/modules/core/src/bufferpool.impl.hpp b/modules/core/src/bufferpool.impl.hpp new file mode 100644 index 000000000..18a90e069 --- /dev/null +++ b/modules/core/src/bufferpool.impl.hpp @@ -0,0 +1,28 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +// +// Copyright (C) 2014, Advanced Micro Devices, Inc., all rights reserved. + +#ifndef __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__ +#define __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__ + +#include "opencv2/core/bufferpool.hpp" + +namespace cv { + +class DummyBufferPoolController : public BufferPoolController +{ +public: + DummyBufferPoolController() { } + virtual ~DummyBufferPoolController() { } + + virtual size_t getReservedSize() const { return (size_t)-1; } + virtual size_t getMaxReservedSize() const { return (size_t)-1; } + virtual void setMaxReservedSize(size_t size) { (void)size; } + virtual void freeAllReservedBuffers() { } +}; + +} // namespace + +#endif // __OPENCV_CORE_BUFFER_POOL_IMPL_HPP__ diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 87e4fd57d..8a11d093e 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -43,6 +43,8 @@ #include "precomp.hpp" #include "opencl_kernels.hpp" +#include "bufferpool.impl.hpp" + /****************************************************************************************\ * [scaled] Identity matrix initialization * \****************************************************************************************/ @@ -157,6 +159,12 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s memcpy(ptrs[1], ptrs[0], planesz); } +BufferPoolController* MatAllocator::getBufferPoolController() const +{ + static DummyBufferPoolController dummy; + return &dummy; +} + class StdMatAllocator : public MatAllocator { public: diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index d8254cbcb..6f19b56eb 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -40,11 +40,48 @@ //M*/ #include "precomp.hpp" +#include #include #include #include #include // std::cerr +#include "opencv2/core/bufferpool.hpp" +#ifndef LOG_BUFFER_POOL +# if 0 +# define LOG_BUFFER_POOL printf +# else +# define LOG_BUFFER_POOL(...) +# endif +#endif + +// TODO Move to some common place +static size_t getConfigurationParameterForSize(const char* name, size_t defaultValue) +{ + const char* envValue = getenv(name); + if (envValue == NULL) + { + return defaultValue; + } + cv::String value = envValue; + size_t pos = 0; + for (; pos < value.size(); pos++) + { + if (!isdigit(value[pos])) + break; + } + cv::String valueStr = value.substr(0, pos); + cv::String suffixStr = value.substr(pos, value.length() - pos); + int v = atoi(valueStr.c_str()); + if (suffixStr.length() == 0) + return v; + else if (suffixStr == "MB" || suffixStr == "Mb" || suffixStr == "mb") + return v * 1024 * 1024; + else if (suffixStr == "KB" || suffixStr == "Kb" || suffixStr == "kb") + return v * 1024; + CV_ErrorNoReturn(cv::Error::StsBadArg, cv::format("Invalid value for %s parameter: %s", name, value.c_str())); +} + #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" @@ -3234,8 +3271,208 @@ ProgramSource2::hash_t ProgramSource2::hash() const //////////////////////////////////////////// OpenCLAllocator ////////////////////////////////////////////////// +class OpenCLBufferPool +{ +protected: + ~OpenCLBufferPool() { } +public: + virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) = 0; + virtual void release(cl_mem handle, size_t capacity) = 0; +}; + +class OpenCLBufferPoolImpl : public BufferPoolController, public OpenCLBufferPool +{ +public: + struct BufferEntry + { + cl_mem clBuffer_; + size_t capacity_; + }; +protected: + Mutex mutex_; + + size_t currentReservedSize; + size_t maxReservedSize; + + std::list reservedEntries_; // LRU order + + // synchronized + bool _findAndRemoveEntryFromReservedList(CV_OUT BufferEntry& entry, const size_t size) + { + if (reservedEntries_.empty()) + return false; + std::list::iterator i = reservedEntries_.begin(); + std::list::iterator result_pos = reservedEntries_.end(); + BufferEntry result = {NULL, 0}; + size_t minDiff = (size_t)(-1); + for (; i != reservedEntries_.end(); ++i) + { + BufferEntry& e = *i; + if (e.capacity_ >= size) + { + size_t diff = e.capacity_ - size; + if (diff < size / 8 && (result_pos == reservedEntries_.end() || diff < minDiff)) + { + minDiff = diff; + result_pos = i; + result = e; + if (diff == 0) + break; + } + } + } + if (result_pos != reservedEntries_.end()) + { + //CV_DbgAssert(result == *result_pos); + reservedEntries_.erase(result_pos); + entry = result; + currentReservedSize -= entry.capacity_; + return true; + } + return false; + } + + // synchronized + void _checkSizeOfReservedEntries() + { + while (currentReservedSize > maxReservedSize) + { + CV_DbgAssert(!reservedEntries_.empty()); + const BufferEntry& entry = reservedEntries_.back(); + CV_DbgAssert(currentReservedSize >= entry.capacity_); + currentReservedSize -= entry.capacity_; + _releaseBufferEntry(entry); + reservedEntries_.pop_back(); + } + } + + inline size_t _allocationGranularity(size_t size) + { + // heuristic values + if (size < 1024) + return 16; + else if (size < 64*1024) + return 64; + else if (size < 1024*1024) + return 4096; + else if (size < 16*1024*1024) + return 64*1024; + else + return 1024*1024; + } + + void _allocateBufferEntry(BufferEntry& entry, size_t size) + { + CV_DbgAssert(entry.clBuffer_ == NULL); + entry.capacity_ = alignSize(size, (int)_allocationGranularity(size)); + Context2& ctx = Context2::getDefault(); + cl_int retval = CL_SUCCESS; + entry.clBuffer_ = clCreateBuffer((cl_context)ctx.ptr(), CL_MEM_READ_WRITE, entry.capacity_, 0, &retval); + CV_Assert(retval == CL_SUCCESS); + CV_Assert(entry.clBuffer_ != NULL); + LOG_BUFFER_POOL("OpenCL allocate %lld (0x%llx) bytes: %p\n", + (long long)entry.capacity_, (long long)entry.capacity_, entry.clBuffer_); + } + + void _releaseBufferEntry(const BufferEntry& entry) + { + CV_Assert(entry.capacity_ != 0); + CV_Assert(entry.clBuffer_ != NULL); + LOG_BUFFER_POOL("OpenCL release buffer: %p, %lld (0x%llx) bytes\n", + entry.clBuffer_, (long long)entry.capacity_, (long long)entry.capacity_); + clReleaseMemObject(entry.clBuffer_); + } +public: + OpenCLBufferPoolImpl() + : currentReservedSize(0), maxReservedSize(0) + { + // Note: Buffer pool is disabled by default, + // because we didn't receive significant performance improvement + maxReservedSize = getConfigurationParameterForSize("OPENCV_OPENCL_BUFFERPOOL_LIMIT", 0); + } + virtual ~OpenCLBufferPoolImpl() + { + freeAllReservedBuffers(); + CV_Assert(reservedEntries_.empty()); + } +public: + virtual cl_mem allocate(size_t size, CV_OUT size_t& capacity) + { + BufferEntry entry = {NULL, 0}; + if (maxReservedSize > 0) + { + AutoLock locker(mutex_); + if (_findAndRemoveEntryFromReservedList(entry, size)) + { + CV_DbgAssert(size <= entry.capacity_); + LOG_BUFFER_POOL("Reuse reserved buffer: %p\n", entry.clBuffer_); + capacity = entry.capacity_; + return entry.clBuffer_; + } + } + _allocateBufferEntry(entry, size); + capacity = entry.capacity_; + return entry.clBuffer_; + } + virtual void release(cl_mem handle, size_t capacity) + { + BufferEntry entry = {handle, capacity}; + if (maxReservedSize == 0 || entry.capacity_ > maxReservedSize / 8) + { + _releaseBufferEntry(entry); + } + else + { + AutoLock locker(mutex_); + reservedEntries_.push_front(entry); + currentReservedSize += entry.capacity_; + _checkSizeOfReservedEntries(); + } + } + + virtual size_t getReservedSize() const { return currentReservedSize; } + virtual size_t getMaxReservedSize() const { return maxReservedSize; } + virtual void setMaxReservedSize(size_t size) + { + AutoLock locker(mutex_); + size_t oldMaxReservedSize = maxReservedSize; + maxReservedSize = size; + if (maxReservedSize < oldMaxReservedSize) + { + std::list::iterator i = reservedEntries_.begin(); + for (; i != reservedEntries_.end();) + { + const BufferEntry& entry = *i; + if (entry.capacity_ > maxReservedSize / 8) + { + CV_DbgAssert(currentReservedSize >= entry.capacity_); + currentReservedSize -= entry.capacity_; + _releaseBufferEntry(entry); + i = reservedEntries_.erase(i); + continue; + } + ++i; + } + _checkSizeOfReservedEntries(); + } + } + virtual void freeAllReservedBuffers() + { + AutoLock locker(mutex_); + std::list::const_iterator i = reservedEntries_.begin(); + for (; i != reservedEntries_.end(); ++i) + { + const BufferEntry& entry = *i; + _releaseBufferEntry(entry); + } + reservedEntries_.clear(); + } +}; + + class OpenCLAllocator : public MatAllocator { + mutable OpenCLBufferPoolImpl bufferPool; public: OpenCLAllocator() { matStdAllocator = Mat::getStdAllocator(); } @@ -3274,17 +3511,18 @@ public: int createFlags = 0, flags0 = 0; getBestFlags(ctx, flags, createFlags, flags0); - cl_int retval = 0; - void* handle = clCreateBuffer((cl_context)ctx.ptr(), - createFlags, total, 0, &retval); - if( !handle || retval != CL_SUCCESS ) + CV_Assert(createFlags == CL_MEM_READ_WRITE); + size_t capacity = 0; + void* handle = bufferPool.allocate(total, capacity); + if (!handle) return defaultAllocate(dims, sizes, type, data, step, flags); UMatData* u = new UMatData(this); u->data = 0; u->size = total; + u->capacity = capacity; u->handle = handle; u->flags = flags0; - + CV_DbgAssert(!u->tempUMat()); // for bufferPool.release() consistency return u; } @@ -3405,8 +3643,9 @@ public: fastFree(u->data); u->data = 0; } - clReleaseMemObject((cl_mem)u->handle); + bufferPool.release((cl_mem)u->handle, u->capacity); u->handle = 0; + u->capacity = 0; delete u; } } @@ -3713,6 +3952,8 @@ public: } } + BufferPoolController* getBufferPoolController() const { return &bufferPool; } + MatAllocator* matStdAllocator; }; diff --git a/modules/core/src/precomp.hpp b/modules/core/src/precomp.hpp index 3727b2f15..ff5943bc6 100644 --- a/modules/core/src/precomp.hpp +++ b/modules/core/src/precomp.hpp @@ -260,11 +260,6 @@ extern TLSData coreTlsData; #define CL_RUNTIME_EXPORT #endif -namespace ocl -{ - MatAllocator* getOpenCLAllocator(); -} - extern bool __termination; // skip some cleanups, because process is terminating // (for example, if ExitProcess() was already called) diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 0baf013be..3e4cfa2b4 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -56,7 +56,7 @@ UMatData::UMatData(const MatAllocator* allocator) prevAllocator = currAllocator = allocator; urefcount = refcount = 0; data = origdata = 0; - size = 0; + size = 0; capacity = 0; flags = 0; handle = 0; userdata = 0; @@ -67,7 +67,7 @@ UMatData::~UMatData() prevAllocator = currAllocator = 0; urefcount = refcount = 0; data = origdata = 0; - size = 0; + size = 0; capacity = 0; flags = 0; handle = 0; userdata = 0; diff --git a/modules/core/test/test_umat.cpp b/modules/core/test/test_umat.cpp index d30b928d1..765a6318e 100644 --- a/modules/core/test/test_umat.cpp +++ b/modules/core/test/test_umat.cpp @@ -291,3 +291,31 @@ TEST(UMat, setOpenCL) // reset state to the previous one ocl::setUseOpenCL(useOCL); } + +TEST(UMat, BufferPoolGrowing) +{ +#ifdef _DEBUG + const int ITERATIONS = 100; +#else + const int ITERATIONS = 200; +#endif + const Size sz(1920, 1080); + BufferPoolController* c = ocl::getOpenCLAllocator()->getBufferPoolController(); + if (c) + { + size_t oldMaxReservedSize = c->getMaxReservedSize(); + c->freeAllReservedBuffers(); + c->setMaxReservedSize(sz.area() * 10); + for (int i = 0; i < ITERATIONS; i++) + { + UMat um(Size(sz.width + i, sz.height + i), CV_8UC1); + UMat um2(Size(sz.width + 2 * i, sz.height + 2 * i), CV_8UC1); + } + c->setMaxReservedSize(oldMaxReservedSize); + c->freeAllReservedBuffers(); + } + else + { + std::cout << "Skipped, no OpenCL" << std::endl; + } +}