ocl: OpenCL SVM support

This commit is contained in:
Alexander Alekhin
2015-01-02 03:33:40 +03:00
parent 58ad952b1a
commit 0a07d780e0
15 changed files with 1542 additions and 161 deletions

View File

@@ -415,7 +415,7 @@ public:
const size_t dstofs[], const size_t dststep[], bool sync) const;
// default implementation returns DummyBufferPoolController
virtual BufferPoolController* getBufferPoolController() const;
virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const;
};
@@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData
int refcount;
uchar* data;
uchar* origdata;
size_t size, capacity;
size_t size;
int flags;
void* handle;

View File

@@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
CV_EXPORTS_W void setUseOpenCL(bool flag);
CV_EXPORTS_W void finish();
CV_EXPORTS bool haveSVM();
class CV_EXPORTS Context;
class CV_EXPORTS Device;
class CV_EXPORTS Kernel;
@@ -248,7 +250,10 @@ public:
void* ptr() const;
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
protected:
bool useSVM() const;
void setUseSVM(bool enabled);
struct Impl;
Impl* p;
};
@@ -666,8 +671,17 @@ protected:
CV_EXPORTS MatAllocator* getOpenCLAllocator();
CV_EXPORTS_W bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
#ifdef __OPENCV_BUILD
namespace internal {
CV_EXPORTS bool isPerformanceCheckBypassed();
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
CV_EXPORTS bool isCLBuffer(UMat& u);
} // namespace internal
#endif
//! @}

View File

@@ -0,0 +1,81 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
#define __OPENCV_CORE_OPENCL_SVM_HPP__
//
// Internal usage only (binary compatibility is not guaranteed)
//
#ifndef __OPENCV_BUILD
#error Internal header file
#endif
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
#include "runtime/opencl_core.hpp"
#include "runtime/opencl_svm_20.hpp"
#include "runtime/opencl_svm_hsa_extension.hpp"
namespace cv { namespace ocl { namespace svm {
struct SVMCapabilities
{
enum Value
{
SVM_COARSE_GRAIN_BUFFER = (1 << 0),
SVM_FINE_GRAIN_BUFFER = (1 << 1),
SVM_FINE_GRAIN_SYSTEM = (1 << 2),
SVM_ATOMICS = (1 << 3),
};
int value_;
SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
operator int() const { return value_; }
inline bool isNoSVMSupport() const { return value_ == 0; }
inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
};
CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
struct SVMFunctions
{
clSVMAllocAMD_fn fn_clSVMAlloc;
clSVMFreeAMD_fn fn_clSVMFree;
clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
inline SVMFunctions()
: fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
/*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
{
// nothing
}
inline bool isValid() const
{
return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
}
};
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
}}} //namespace cv::ocl::svm
#endif
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
/* End of file. */

View File

@@ -62,6 +62,18 @@
#endif
#endif
#ifdef HAVE_OPENCL_SVM
#define clSVMAlloc clSVMAlloc_
#define clSVMFree clSVMFree_
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
#define clSetKernelExecInfo clSetKernelExecInfo_
#define clEnqueueSVMFree clEnqueueSVMFree_
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
#define clEnqueueSVMMap clEnqueueSVMMap_
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
#endif
#include "autogenerated/opencl_core.hpp"
#endif // HAVE_OPENCL_STATIC

View File

@@ -0,0 +1,52 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#include "opencl_svm_definitions.hpp"
#ifndef HAVE_OPENCL_STATIC
#undef clSVMAlloc
#define clSVMAlloc clSVMAlloc_pfn
#undef clSVMFree
#define clSVMFree clSVMFree_pfn
#undef clSetKernelArgSVMPointer
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
#undef clSetKernelExecInfo
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
#undef clEnqueueSVMFree
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
#undef clEnqueueSVMMemcpy
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
#undef clEnqueueSVMMemFill
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
#undef clEnqueueSVMMap
#define clEnqueueSVMMap clEnqueueSVMMap_pfn
#undef clEnqueueSVMUnmap
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment);
extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value);
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr,
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
#endif // HAVE_OPENCL_STATIC
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__

View File

@@ -0,0 +1,42 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
#if defined(HAVE_OPENCL_SVM)
#if defined(CL_VERSION_2_0)
// OpenCL 2.0 contains SVM definitions
#else
typedef cl_bitfield cl_device_svm_capabilities;
typedef cl_bitfield cl_svm_mem_flags;
typedef cl_uint cl_kernel_exec_info;
//
// TODO Add real values after OpenCL 2.0 release
//
#ifndef CL_DEVICE_SVM_CAPABILITIES
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
#endif
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
#endif
#ifndef CL_MEM_SVM_ATOMICS
#define CL_MEM_SVM_ATOMICS (1 << 11)
#endif
#endif // CL_VERSION_2_0
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__

View File

@@ -0,0 +1,166 @@
/* See LICENSE file in the root OpenCV directory */
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
#if defined(HAVE_OPENCL_SVM)
#include "opencl_core.hpp"
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
//
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
// Below is the original copyright.
//
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/*******************************************
* Shared Virtual Memory (SVM) extension
*******************************************/
typedef cl_bitfield cl_device_svm_capabilities_amd;
typedef cl_bitfield cl_svm_mem_flags_amd;
typedef cl_uint cl_kernel_exec_info_amd;
/* cl_device_info */
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054
/* cl_device_svm_capabilities_amd */
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0)
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1)
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2)
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3)
/* cl_svm_mem_flags_amd */
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10)
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11)
/* cl_mem_info */
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109
/* cl_kernel_exec_info_amd */
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7
/* cl_command_type */
#define CL_COMMAND_SVM_FREE_AMD 0x1209
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B
#define CL_COMMAND_SVM_MAP_AMD 0x120C
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D
typedef CL_API_ENTRY void*
(CL_API_CALL * clSVMAllocAMD_fn)(
cl_context /* context */,
cl_svm_mem_flags_amd /* flags */,
size_t /* size */,
unsigned int /* alignment */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY void
(CL_API_CALL * clSVMFreeAMD_fn)(
cl_context /* context */,
void* /* svm_pointer */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMFreeAMD_fn)(
cl_command_queue /* command_queue */,
cl_uint /* num_svm_pointers */,
void** /* svm_pointers */,
void (CL_CALLBACK *)( /*pfn_free_func*/
cl_command_queue /* queue */,
cl_uint /* num_svm_pointers */,
void** /* svm_pointers */,
void* /* user_data */),
void* /* user_data */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)(
cl_command_queue /* command_queue */,
cl_bool /* blocking_copy */,
void* /* dst_ptr */,
const void* /* src_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)(
cl_command_queue /* command_queue */,
void* /* svm_ptr */,
const void* /* pattern */,
size_t /* pattern_size */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMMapAMD_fn)(
cl_command_queue /* command_queue */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
void* /* svm_ptr */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)(
cl_command_queue /* command_queue */,
void* /* svm_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event* /* event_wait_list */,
cl_event* /* event */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)(
cl_kernel /* kernel */,
cl_uint /* arg_index */,
const void * /* arg_value */
) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int
(CL_API_CALL * clSetKernelExecInfoAMD_fn)(
cl_kernel /* kernel */,
cl_kernel_exec_info_amd /* param_name */,
size_t /* param_value_size */,
const void * /* param_value */
) CL_EXT_SUFFIX__VERSION_1_2;
#endif
#endif // HAVE_OPENCL_SVM
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__