ocl: OpenCL SVM support
This commit is contained in:
parent
58ad952b1a
commit
0a07d780e0
@ -162,6 +162,7 @@ OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF
|
||||
OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) )
|
||||
OCV_OPTION(WITH_CLP "Include Clp support (EPL)" OFF)
|
||||
OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON IF (NOT IOS) )
|
||||
OCV_OPTION(WITH_OPENCL_SVM "Include OpenCL Shared Virtual Memory support" OFF ) # experimental
|
||||
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )
|
||||
|
@ -26,6 +26,10 @@ if(OPENCL_FOUND)
|
||||
|
||||
set(HAVE_OPENCL 1)
|
||||
|
||||
if(WITH_OPENCL_SVM)
|
||||
set(HAVE_OPENCL_SVM 1)
|
||||
endif()
|
||||
|
||||
if(HAVE_OPENCL_STATIC)
|
||||
set(OPENCL_LIBRARIES "${OPENCL_LIBRARY}")
|
||||
else()
|
||||
|
@ -122,6 +122,7 @@
|
||||
/* OpenCL Support */
|
||||
#cmakedefine HAVE_OPENCL
|
||||
#cmakedefine HAVE_OPENCL_STATIC
|
||||
#cmakedefine HAVE_OPENCL_SVM
|
||||
|
||||
/* OpenEXR codec */
|
||||
#cmakedefine HAVE_OPENEXR
|
||||
|
@ -415,7 +415,7 @@ public:
|
||||
const size_t dstofs[], const size_t dststep[], bool sync) const;
|
||||
|
||||
// default implementation returns DummyBufferPoolController
|
||||
virtual BufferPoolController* getBufferPoolController() const;
|
||||
virtual BufferPoolController* getBufferPoolController(const char* id = NULL) const;
|
||||
};
|
||||
|
||||
|
||||
@ -481,7 +481,7 @@ struct CV_EXPORTS UMatData
|
||||
int refcount;
|
||||
uchar* data;
|
||||
uchar* origdata;
|
||||
size_t size, capacity;
|
||||
size_t size;
|
||||
|
||||
int flags;
|
||||
void* handle;
|
||||
|
@ -56,6 +56,8 @@ CV_EXPORTS_W bool haveAmdFft();
|
||||
CV_EXPORTS_W void setUseOpenCL(bool flag);
|
||||
CV_EXPORTS_W void finish();
|
||||
|
||||
CV_EXPORTS bool haveSVM();
|
||||
|
||||
class CV_EXPORTS Context;
|
||||
class CV_EXPORTS Device;
|
||||
class CV_EXPORTS Kernel;
|
||||
@ -248,7 +250,10 @@ public:
|
||||
void* ptr() const;
|
||||
|
||||
friend void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);
|
||||
protected:
|
||||
|
||||
bool useSVM() const;
|
||||
void setUseSVM(bool enabled);
|
||||
|
||||
struct Impl;
|
||||
Impl* p;
|
||||
};
|
||||
@ -666,8 +671,17 @@ protected:
|
||||
|
||||
CV_EXPORTS MatAllocator* getOpenCLAllocator();
|
||||
|
||||
CV_EXPORTS_W bool isPerformanceCheckBypassed();
|
||||
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::isPerformanceCheckBypassed() || (condition))
|
||||
|
||||
#ifdef __OPENCV_BUILD
|
||||
namespace internal {
|
||||
|
||||
CV_EXPORTS bool isPerformanceCheckBypassed();
|
||||
#define OCL_PERFORMANCE_CHECK(condition) (cv::ocl::internal::isPerformanceCheckBypassed() || (condition))
|
||||
|
||||
CV_EXPORTS bool isCLBuffer(UMat& u);
|
||||
|
||||
} // namespace internal
|
||||
#endif
|
||||
|
||||
//! @}
|
||||
|
||||
|
81
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
Normal file
81
modules/core/include/opencv2/core/opencl/opencl_svm.hpp
Normal file
@ -0,0 +1,81 @@
|
||||
/* See LICENSE file in the root OpenCV directory */
|
||||
|
||||
#ifndef __OPENCV_CORE_OPENCL_SVM_HPP__
|
||||
#define __OPENCV_CORE_OPENCL_SVM_HPP__
|
||||
|
||||
//
|
||||
// Internal usage only (binary compatibility is not guaranteed)
|
||||
//
|
||||
#ifndef __OPENCV_BUILD
|
||||
#error Internal header file
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_OPENCL) && defined(HAVE_OPENCL_SVM)
|
||||
#include "runtime/opencl_core.hpp"
|
||||
#include "runtime/opencl_svm_20.hpp"
|
||||
#include "runtime/opencl_svm_hsa_extension.hpp"
|
||||
|
||||
namespace cv { namespace ocl { namespace svm {
|
||||
|
||||
struct SVMCapabilities
|
||||
{
|
||||
enum Value
|
||||
{
|
||||
SVM_COARSE_GRAIN_BUFFER = (1 << 0),
|
||||
SVM_FINE_GRAIN_BUFFER = (1 << 1),
|
||||
SVM_FINE_GRAIN_SYSTEM = (1 << 2),
|
||||
SVM_ATOMICS = (1 << 3),
|
||||
};
|
||||
int value_;
|
||||
|
||||
SVMCapabilities(int capabilities = 0) : value_(capabilities) { }
|
||||
operator int() const { return value_; }
|
||||
|
||||
inline bool isNoSVMSupport() const { return value_ == 0; }
|
||||
inline bool isSupportCoarseGrainBuffer() const { return (value_ & SVM_COARSE_GRAIN_BUFFER) != 0; }
|
||||
inline bool isSupportFineGrainBuffer() const { return (value_ & SVM_FINE_GRAIN_BUFFER) != 0; }
|
||||
inline bool isSupportFineGrainSystem() const { return (value_ & SVM_FINE_GRAIN_SYSTEM) != 0; }
|
||||
inline bool isSupportAtomics() const { return (value_ & SVM_ATOMICS) != 0; }
|
||||
};
|
||||
|
||||
CV_EXPORTS const SVMCapabilities getSVMCapabilitites(const ocl::Context& context);
|
||||
|
||||
struct SVMFunctions
|
||||
{
|
||||
clSVMAllocAMD_fn fn_clSVMAlloc;
|
||||
clSVMFreeAMD_fn fn_clSVMFree;
|
||||
clSetKernelArgSVMPointerAMD_fn fn_clSetKernelArgSVMPointer;
|
||||
//clSetKernelExecInfoAMD_fn fn_clSetKernelExecInfo;
|
||||
//clEnqueueSVMFreeAMD_fn fn_clEnqueueSVMFree;
|
||||
clEnqueueSVMMemcpyAMD_fn fn_clEnqueueSVMMemcpy;
|
||||
clEnqueueSVMMemFillAMD_fn fn_clEnqueueSVMMemFill;
|
||||
clEnqueueSVMMapAMD_fn fn_clEnqueueSVMMap;
|
||||
clEnqueueSVMUnmapAMD_fn fn_clEnqueueSVMUnmap;
|
||||
|
||||
inline SVMFunctions()
|
||||
: fn_clSVMAlloc(NULL), fn_clSVMFree(NULL),
|
||||
fn_clSetKernelArgSVMPointer(NULL), /*fn_clSetKernelExecInfo(NULL),*/
|
||||
/*fn_clEnqueueSVMFree(NULL),*/ fn_clEnqueueSVMMemcpy(NULL), fn_clEnqueueSVMMemFill(NULL),
|
||||
fn_clEnqueueSVMMap(NULL), fn_clEnqueueSVMUnmap(NULL)
|
||||
{
|
||||
// nothing
|
||||
}
|
||||
|
||||
inline bool isValid() const
|
||||
{
|
||||
return fn_clSVMAlloc != NULL && fn_clSVMFree && fn_clSetKernelArgSVMPointer &&
|
||||
/*fn_clSetKernelExecInfo && fn_clEnqueueSVMFree &&*/ fn_clEnqueueSVMMemcpy &&
|
||||
fn_clEnqueueSVMMemFill && fn_clEnqueueSVMMap && fn_clEnqueueSVMUnmap;
|
||||
}
|
||||
};
|
||||
|
||||
// We should guarantee that SVMFunctions lifetime is not less than context's lifetime
|
||||
CV_EXPORTS const SVMFunctions* getSVMFunctions(const ocl::Context& context);
|
||||
|
||||
CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags);
|
||||
|
||||
}}} //namespace cv::ocl::svm
|
||||
#endif
|
||||
|
||||
#endif // __OPENCV_CORE_OPENCL_SVM_HPP__
|
||||
/* End of file. */
|
@ -62,6 +62,18 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_OPENCL_SVM
|
||||
#define clSVMAlloc clSVMAlloc_
|
||||
#define clSVMFree clSVMFree_
|
||||
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_
|
||||
#define clSetKernelExecInfo clSetKernelExecInfo_
|
||||
#define clEnqueueSVMFree clEnqueueSVMFree_
|
||||
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_
|
||||
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_
|
||||
#define clEnqueueSVMMap clEnqueueSVMMap_
|
||||
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_
|
||||
#endif
|
||||
|
||||
#include "autogenerated/opencl_core.hpp"
|
||||
|
||||
#endif // HAVE_OPENCL_STATIC
|
||||
|
@ -0,0 +1,52 @@
|
||||
/* See LICENSE file in the root OpenCV directory */
|
||||
|
||||
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
|
||||
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
|
||||
|
||||
#if defined(HAVE_OPENCL_SVM)
|
||||
#include "opencl_core.hpp"
|
||||
|
||||
#include "opencl_svm_definitions.hpp"
|
||||
|
||||
#ifndef HAVE_OPENCL_STATIC
|
||||
|
||||
#undef clSVMAlloc
|
||||
#define clSVMAlloc clSVMAlloc_pfn
|
||||
#undef clSVMFree
|
||||
#define clSVMFree clSVMFree_pfn
|
||||
#undef clSetKernelArgSVMPointer
|
||||
#define clSetKernelArgSVMPointer clSetKernelArgSVMPointer_pfn
|
||||
#undef clSetKernelExecInfo
|
||||
//#define clSetKernelExecInfo clSetKernelExecInfo_pfn
|
||||
#undef clEnqueueSVMFree
|
||||
//#define clEnqueueSVMFree clEnqueueSVMFree_pfn
|
||||
#undef clEnqueueSVMMemcpy
|
||||
#define clEnqueueSVMMemcpy clEnqueueSVMMemcpy_pfn
|
||||
#undef clEnqueueSVMMemFill
|
||||
#define clEnqueueSVMMemFill clEnqueueSVMMemFill_pfn
|
||||
#undef clEnqueueSVMMap
|
||||
#define clEnqueueSVMMap clEnqueueSVMMap_pfn
|
||||
#undef clEnqueueSVMUnmap
|
||||
#define clEnqueueSVMUnmap clEnqueueSVMUnmap_pfn
|
||||
|
||||
extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment);
|
||||
extern CL_RUNTIME_EXPORT void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer);
|
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value);
|
||||
//extern CL_RUNTIME_EXPORT void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value);
|
||||
//extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMFree)(cl_command_queue command_queue, cl_uint num_svm_pointers, void* svm_pointers[],
|
||||
// void (CL_CALLBACK *pfn_free_func)(cl_command_queue queue, cl_uint num_svm_pointers, void* svm_pointers[], void* user_data), void* user_data,
|
||||
// cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
|
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size,
|
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
|
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size,
|
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
|
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size,
|
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
|
||||
extern CL_RUNTIME_EXPORT cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr,
|
||||
cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event);
|
||||
|
||||
#endif // HAVE_OPENCL_STATIC
|
||||
|
||||
#endif // HAVE_OPENCL_SVM
|
||||
|
||||
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_2_0_HPP__
|
@ -0,0 +1,42 @@
|
||||
/* See LICENSE file in the root OpenCV directory */
|
||||
|
||||
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
|
||||
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
|
||||
|
||||
#if defined(HAVE_OPENCL_SVM)
|
||||
#if defined(CL_VERSION_2_0)
|
||||
|
||||
// OpenCL 2.0 contains SVM definitions
|
||||
|
||||
#else
|
||||
|
||||
typedef cl_bitfield cl_device_svm_capabilities;
|
||||
typedef cl_bitfield cl_svm_mem_flags;
|
||||
typedef cl_uint cl_kernel_exec_info;
|
||||
|
||||
//
|
||||
// TODO Add real values after OpenCL 2.0 release
|
||||
//
|
||||
|
||||
#ifndef CL_DEVICE_SVM_CAPABILITIES
|
||||
#define CL_DEVICE_SVM_CAPABILITIES 0x1053
|
||||
|
||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER (1 << 0)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER (1 << 1)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM (1 << 2)
|
||||
#define CL_DEVICE_SVM_ATOMICS (1 << 3)
|
||||
#endif
|
||||
|
||||
#ifndef CL_MEM_SVM_FINE_GRAIN_BUFFER
|
||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER (1 << 10)
|
||||
#endif
|
||||
|
||||
#ifndef CL_MEM_SVM_ATOMICS
|
||||
#define CL_MEM_SVM_ATOMICS (1 << 11)
|
||||
#endif
|
||||
|
||||
|
||||
#endif // CL_VERSION_2_0
|
||||
#endif // HAVE_OPENCL_SVM
|
||||
|
||||
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_DEFINITIONS_HPP__
|
@ -0,0 +1,166 @@
|
||||
/* See LICENSE file in the root OpenCV directory */
|
||||
|
||||
#ifndef __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
|
||||
#define __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
|
||||
|
||||
#if defined(HAVE_OPENCL_SVM)
|
||||
#include "opencl_core.hpp"
|
||||
|
||||
#ifndef CL_DEVICE_SVM_CAPABILITIES_AMD
|
||||
//
|
||||
// Part of the file is an extract from the cl_ext.h file from AMD APP SDK package.
|
||||
// Below is the original copyright.
|
||||
//
|
||||
/*******************************************************************************
|
||||
* Copyright (c) 2008-2013 The Khronos Group Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and/or associated documentation files (the
|
||||
* "Materials"), to deal in the Materials without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Materials, and to
|
||||
* permit persons to whom the Materials are furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included
|
||||
* in all copies or substantial portions of the Materials.
|
||||
*
|
||||
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
|
||||
******************************************************************************/
|
||||
|
||||
/*******************************************
|
||||
* Shared Virtual Memory (SVM) extension
|
||||
*******************************************/
|
||||
typedef cl_bitfield cl_device_svm_capabilities_amd;
|
||||
typedef cl_bitfield cl_svm_mem_flags_amd;
|
||||
typedef cl_uint cl_kernel_exec_info_amd;
|
||||
|
||||
/* cl_device_info */
|
||||
#define CL_DEVICE_SVM_CAPABILITIES_AMD 0x1053
|
||||
#define CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT_AMD 0x1054
|
||||
|
||||
/* cl_device_svm_capabilities_amd */
|
||||
#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_AMD (1 << 0)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_AMD (1 << 1)
|
||||
#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_AMD (1 << 2)
|
||||
#define CL_DEVICE_SVM_ATOMICS_AMD (1 << 3)
|
||||
|
||||
/* cl_svm_mem_flags_amd */
|
||||
#define CL_MEM_SVM_FINE_GRAIN_BUFFER_AMD (1 << 10)
|
||||
#define CL_MEM_SVM_ATOMICS_AMD (1 << 11)
|
||||
|
||||
/* cl_mem_info */
|
||||
#define CL_MEM_USES_SVM_POINTER_AMD 0x1109
|
||||
|
||||
/* cl_kernel_exec_info_amd */
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_PTRS_AMD 0x11B6
|
||||
#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_AMD 0x11B7
|
||||
|
||||
/* cl_command_type */
|
||||
#define CL_COMMAND_SVM_FREE_AMD 0x1209
|
||||
#define CL_COMMAND_SVM_MEMCPY_AMD 0x120A
|
||||
#define CL_COMMAND_SVM_MEMFILL_AMD 0x120B
|
||||
#define CL_COMMAND_SVM_MAP_AMD 0x120C
|
||||
#define CL_COMMAND_SVM_UNMAP_AMD 0x120D
|
||||
|
||||
typedef CL_API_ENTRY void*
|
||||
(CL_API_CALL * clSVMAllocAMD_fn)(
|
||||
cl_context /* context */,
|
||||
cl_svm_mem_flags_amd /* flags */,
|
||||
size_t /* size */,
|
||||
unsigned int /* alignment */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY void
|
||||
(CL_API_CALL * clSVMFreeAMD_fn)(
|
||||
cl_context /* context */,
|
||||
void* /* svm_pointer */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clEnqueueSVMFreeAMD_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_uint /* num_svm_pointers */,
|
||||
void** /* svm_pointers */,
|
||||
void (CL_CALLBACK *)( /*pfn_free_func*/
|
||||
cl_command_queue /* queue */,
|
||||
cl_uint /* num_svm_pointers */,
|
||||
void** /* svm_pointers */,
|
||||
void* /* user_data */),
|
||||
void* /* user_data */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clEnqueueSVMMemcpyAMD_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_bool /* blocking_copy */,
|
||||
void* /* dst_ptr */,
|
||||
const void* /* src_ptr */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clEnqueueSVMMemFillAMD_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
void* /* svm_ptr */,
|
||||
const void* /* pattern */,
|
||||
size_t /* pattern_size */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clEnqueueSVMMapAMD_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
cl_bool /* blocking_map */,
|
||||
cl_map_flags /* map_flags */,
|
||||
void* /* svm_ptr */,
|
||||
size_t /* size */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clEnqueueSVMUnmapAMD_fn)(
|
||||
cl_command_queue /* command_queue */,
|
||||
void* /* svm_ptr */,
|
||||
cl_uint /* num_events_in_wait_list */,
|
||||
const cl_event* /* event_wait_list */,
|
||||
cl_event* /* event */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clSetKernelArgSVMPointerAMD_fn)(
|
||||
cl_kernel /* kernel */,
|
||||
cl_uint /* arg_index */,
|
||||
const void * /* arg_value */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int
|
||||
(CL_API_CALL * clSetKernelExecInfoAMD_fn)(
|
||||
cl_kernel /* kernel */,
|
||||
cl_kernel_exec_info_amd /* param_name */,
|
||||
size_t /* param_value_size */,
|
||||
const void * /* param_value */
|
||||
) CL_EXT_SUFFIX__VERSION_1_2;
|
||||
|
||||
#endif
|
||||
|
||||
#endif // HAVE_OPENCL_SVM
|
||||
|
||||
#endif // __OPENCV_CORE_OCL_RUNTIME_OPENCL_SVM_HSA_EXTENSION_HPP__
|
@ -721,6 +721,16 @@ static bool ocl_gemm_amdblas( InputArray matA, InputArray matB, double alpha,
|
||||
return false;
|
||||
|
||||
UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
|
||||
if (!ocl::internal::isCLBuffer(A) || !ocl::internal::isCLBuffer(B) || !ocl::internal::isCLBuffer(D))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (haveC)
|
||||
{
|
||||
UMat C = matC.getUMat();
|
||||
if (!ocl::internal::isCLBuffer(C))
|
||||
return false;
|
||||
}
|
||||
if (haveC)
|
||||
ctrans ? transpose(matC, D) : matC.copyTo(D);
|
||||
else
|
||||
|
@ -159,8 +159,9 @@ void MatAllocator::copy(UMatData* usrc, UMatData* udst, int dims, const size_t s
|
||||
memcpy(ptrs[1], ptrs[0], planesz);
|
||||
}
|
||||
|
||||
BufferPoolController* MatAllocator::getBufferPoolController() const
|
||||
BufferPoolController* MatAllocator::getBufferPoolController(const char* id) const
|
||||
{
|
||||
(void)id;
|
||||
static DummyBufferPoolController dummy;
|
||||
return &dummy;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -182,6 +182,65 @@ static void* opencl_check_fn(int ID);
|
||||
|
||||
#define CUSTOM_FUNCTION_ID 1000
|
||||
|
||||
#ifdef HAVE_OPENCL_SVM
|
||||
#include "opencv2/core/opencl/runtime/opencl_svm_20.hpp"
|
||||
#define SVM_FUNCTION_ID_START CUSTOM_FUNCTION_ID
|
||||
#define SVM_FUNCTION_ID_END CUSTOM_FUNCTION_ID + 100
|
||||
|
||||
enum OPENCL_FN_SVM_ID
|
||||
{
|
||||
OPENCL_FN_clSVMAlloc = SVM_FUNCTION_ID_START,
|
||||
OPENCL_FN_clSVMFree,
|
||||
OPENCL_FN_clSetKernelArgSVMPointer,
|
||||
OPENCL_FN_clSetKernelExecInfo,
|
||||
OPENCL_FN_clEnqueueSVMFree,
|
||||
OPENCL_FN_clEnqueueSVMMemcpy,
|
||||
OPENCL_FN_clEnqueueSVMMemFill,
|
||||
OPENCL_FN_clEnqueueSVMMap,
|
||||
OPENCL_FN_clEnqueueSVMUnmap,
|
||||
};
|
||||
|
||||
void* (CL_API_CALL *clSVMAlloc)(cl_context context, cl_svm_mem_flags flags, size_t size, unsigned int alignment) =
|
||||
opencl_fn4<OPENCL_FN_clSVMAlloc, void*, cl_context, cl_svm_mem_flags, size_t, unsigned int>::switch_fn;
|
||||
static const struct DynamicFnEntry _clSVMAlloc_definition = { "clSVMAlloc", (void**)&clSVMAlloc};
|
||||
void (CL_API_CALL *clSVMFree)(cl_context context, void* svm_pointer) =
|
||||
opencl_fn2<OPENCL_FN_clSVMFree, void, cl_context, void*>::switch_fn;
|
||||
static const struct DynamicFnEntry _clSVMFree_definition = { "clSVMFree", (void**)&clSVMFree};
|
||||
cl_int (CL_API_CALL *clSetKernelArgSVMPointer)(cl_kernel kernel, cl_uint arg_index, const void* arg_value) =
|
||||
opencl_fn3<OPENCL_FN_clSetKernelArgSVMPointer, cl_int, cl_kernel, cl_uint, const void*>::switch_fn;
|
||||
static const struct DynamicFnEntry _clSetKernelArgSVMPointer_definition = { "clSetKernelArgSVMPointer", (void**)&clSetKernelArgSVMPointer};
|
||||
//void* (CL_API_CALL *clSetKernelExecInfo)(cl_kernel kernel, cl_kernel_exec_info param_name, size_t param_value_size, const void* param_value) =
|
||||
// opencl_fn4<OPENCL_FN_clSetKernelExecInfo, void*, cl_kernel, cl_kernel_exec_info, size_t, const void*>::switch_fn;
|
||||
//static const struct DynamicFnEntry _clSetKernelExecInfo_definition = { "clSetKernelExecInfo", (void**)&clSetKernelExecInfo};
|
||||
//cl_int (CL_API_CALL *clEnqueueSVMFree)(...) =
|
||||
// opencl_fn8<OPENCL_FN_clEnqueueSVMFree, cl_int, ...>::switch_fn;
|
||||
//static const struct DynamicFnEntry _clEnqueueSVMFree_definition = { "clEnqueueSVMFree", (void**)&clEnqueueSVMFree};
|
||||
cl_int (CL_API_CALL *clEnqueueSVMMemcpy)(cl_command_queue command_queue, cl_bool blocking_copy, void* dst_ptr, const void* src_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
|
||||
opencl_fn8<OPENCL_FN_clEnqueueSVMMemcpy, cl_int, cl_command_queue, cl_bool, void*, const void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
|
||||
static const struct DynamicFnEntry _clEnqueueSVMMemcpy_definition = { "clEnqueueSVMMemcpy", (void**)&clEnqueueSVMMemcpy};
|
||||
cl_int (CL_API_CALL *clEnqueueSVMMemFill)(cl_command_queue command_queue, void* svm_ptr, const void* pattern, size_t pattern_size, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
|
||||
opencl_fn8<OPENCL_FN_clEnqueueSVMMemFill, cl_int, cl_command_queue, void*, const void*, size_t, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
|
||||
static const struct DynamicFnEntry _clEnqueueSVMMemFill_definition = { "clEnqueueSVMMemFill", (void**)&clEnqueueSVMMemFill};
|
||||
cl_int (CL_API_CALL *clEnqueueSVMMap)(cl_command_queue command_queue, cl_bool blocking_map, cl_map_flags map_flags, void* svm_ptr, size_t size, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
|
||||
opencl_fn8<OPENCL_FN_clEnqueueSVMMap, cl_int, cl_command_queue, cl_bool, cl_map_flags, void*, size_t, cl_uint, const cl_event*, cl_event*>::switch_fn;
|
||||
static const struct DynamicFnEntry _clEnqueueSVMMap_definition = { "clEnqueueSVMMap", (void**)&clEnqueueSVMMap};
|
||||
cl_int (CL_API_CALL *clEnqueueSVMUnmap)(cl_command_queue command_queue, void* svm_ptr, cl_uint num_events_in_wait_list, const cl_event* event_wait_list, cl_event* event) =
|
||||
opencl_fn5<OPENCL_FN_clEnqueueSVMUnmap, cl_int, cl_command_queue, void*, cl_uint, const cl_event*, cl_event*>::switch_fn;
|
||||
static const struct DynamicFnEntry _clEnqueueSVMUnmap_definition = { "clEnqueueSVMUnmap", (void**)&clEnqueueSVMUnmap};
|
||||
|
||||
static const struct DynamicFnEntry* opencl_svm_fn_list[] = {
|
||||
&_clSVMAlloc_definition,
|
||||
&_clSVMFree_definition,
|
||||
&_clSetKernelArgSVMPointer_definition,
|
||||
NULL/*&_clSetKernelExecInfo_definition*/,
|
||||
NULL/*&_clEnqueueSVMFree_definition*/,
|
||||
&_clEnqueueSVMMemcpy_definition,
|
||||
&_clEnqueueSVMMemFill_definition,
|
||||
&_clEnqueueSVMMap_definition,
|
||||
&_clEnqueueSVMUnmap_definition,
|
||||
};
|
||||
#endif // HAVE_OPENCL_SVM
|
||||
|
||||
//
|
||||
// END OF CUSTOM FUNCTIONS HERE
|
||||
//
|
||||
@ -194,6 +253,14 @@ static void* opencl_check_fn(int ID)
|
||||
assert(ID >= 0 && ID < (int)(sizeof(opencl_fn_list)/sizeof(opencl_fn_list[0])));
|
||||
e = opencl_fn_list[ID];
|
||||
}
|
||||
#ifdef HAVE_OPENCL_SVM
|
||||
else if (ID >= SVM_FUNCTION_ID_START && ID < SVM_FUNCTION_ID_END)
|
||||
{
|
||||
ID = ID - SVM_FUNCTION_ID_START;
|
||||
assert(ID >= 0 && ID < (int)(sizeof(opencl_svm_fn_list)/sizeof(opencl_svm_fn_list[0])));
|
||||
e = opencl_svm_fn_list[ID];
|
||||
}
|
||||
#endif
|
||||
else
|
||||
{
|
||||
CV_ErrorNoReturn(cv::Error::StsBadArg, "Invalid function ID");
|
||||
|
@ -55,7 +55,7 @@ UMatData::UMatData(const MatAllocator* allocator)
|
||||
prevAllocator = currAllocator = allocator;
|
||||
urefcount = refcount = 0;
|
||||
data = origdata = 0;
|
||||
size = 0; capacity = 0;
|
||||
size = 0;
|
||||
flags = 0;
|
||||
handle = 0;
|
||||
userdata = 0;
|
||||
@ -67,7 +67,7 @@ UMatData::~UMatData()
|
||||
prevAllocator = currAllocator = 0;
|
||||
urefcount = refcount = 0;
|
||||
data = origdata = 0;
|
||||
size = 0; capacity = 0;
|
||||
size = 0;
|
||||
flags = 0;
|
||||
handle = 0;
|
||||
userdata = 0;
|
||||
@ -221,7 +221,7 @@ UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
|
||||
temp_u = a->allocate(dims, size.p, type(), data, step.p, accessFlags, usageFlags);
|
||||
temp_u->refcount = 1;
|
||||
}
|
||||
UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags);
|
||||
UMat::getStdAllocator()->allocate(temp_u, accessFlags, usageFlags); // TODO result is not checked
|
||||
hdr.flags = flags;
|
||||
setSize(hdr, dims, size.p, step.p);
|
||||
finalizeHdr(hdr);
|
||||
@ -575,7 +575,7 @@ Mat UMat::getMat(int accessFlags) const
|
||||
{
|
||||
if(!u)
|
||||
return Mat();
|
||||
u->currAllocator->map(u, accessFlags | ACCESS_READ);
|
||||
u->currAllocator->map(u, accessFlags | ACCESS_READ); // TODO Support ACCESS_WRITE without unnecessary data transfers
|
||||
CV_Assert(u->data != 0);
|
||||
Mat hdr(dims, size.p, type(), u->data + offset, step.p);
|
||||
hdr.flags = flags;
|
||||
|
Loading…
x
Reference in New Issue
Block a user