ocl: split initialization.cpp into 3 files: context, operations, programcache
This commit is contained in:
@@ -57,8 +57,7 @@ namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
using std::auto_ptr;
|
||||
enum
|
||||
enum DeviceType
|
||||
{
|
||||
CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
|
||||
CVCL_DEVICE_TYPE_CPU = (1 << 1),
|
||||
@@ -93,77 +92,113 @@ namespace cv
|
||||
//return -1 if the target type is unsupported, otherwise return 0
|
||||
CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
|
||||
|
||||
//this class contains ocl runtime information
|
||||
class CV_EXPORTS Info
|
||||
// these classes contain OpenCL runtime information
|
||||
|
||||
struct PlatformInfo;
|
||||
|
||||
struct DeviceInfo
|
||||
{
|
||||
public:
|
||||
struct Impl;
|
||||
Impl *impl;
|
||||
int _id; // reserved, don't use it
|
||||
|
||||
Info();
|
||||
Info(const Info &m);
|
||||
~Info();
|
||||
void release();
|
||||
Info &operator = (const Info &m);
|
||||
std::vector<string> DeviceName;
|
||||
DeviceType deviceType;
|
||||
std::string deviceProfile;
|
||||
std::string deviceVersion;
|
||||
std::string deviceName;
|
||||
std::string deviceVendor;
|
||||
int deviceVendorId;
|
||||
std::string deviceDriverVersion;
|
||||
std::string deviceExtensions;
|
||||
|
||||
size_t maxWorkGroupSize;
|
||||
std::vector<size_t> maxWorkItemSizes;
|
||||
int maxComputeUnits;
|
||||
size_t localMemorySize;
|
||||
|
||||
int deviceVersionMajor;
|
||||
int deviceVersionMinor;
|
||||
|
||||
bool haveDoubleSupport;
|
||||
bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
|
||||
|
||||
std::string compilationExtraOptions;
|
||||
|
||||
const PlatformInfo* platform;
|
||||
|
||||
DeviceInfo();
|
||||
};
|
||||
|
||||
struct PlatformInfo
|
||||
{
|
||||
int _id; // reserved, don't use it
|
||||
|
||||
std::string platformProfile;
|
||||
std::string platformVersion;
|
||||
std::string platformName;
|
||||
std::string platformVendor;
|
||||
std::string platformExtensons;
|
||||
|
||||
int platformVersionMajor;
|
||||
int platformVersionMinor;
|
||||
|
||||
std::vector<const DeviceInfo*> devices;
|
||||
|
||||
PlatformInfo();
|
||||
};
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
//this function may be obsoleted
|
||||
//CV_EXPORTS cl_device_id getDevice();
|
||||
//the function must be called before any other cv::ocl::functions, it initialize ocl runtime
|
||||
//each Info relates to an OpenCL platform
|
||||
//there is one or more devices in each platform, each one has a separate name
|
||||
CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
|
||||
typedef std::vector<const PlatformInfo*> PlatformsInfo;
|
||||
|
||||
//set device you want to use, optional function after getDevice be called
|
||||
//the devnum is the index of the selected device in DeviceName vector of INfo
|
||||
CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
|
||||
CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
|
||||
|
||||
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
|
||||
//returns cl_context *
|
||||
CV_EXPORTS void* getoclContext();
|
||||
//returns cl_command_queue *
|
||||
CV_EXPORTS void* getoclCommandQueue();
|
||||
typedef std::vector<const DeviceInfo*> DevicesInfo;
|
||||
|
||||
//explicit call clFinish. The global command queue will be used.
|
||||
CV_EXPORTS void finish();
|
||||
CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
|
||||
const PlatformInfo* platform = NULL);
|
||||
|
||||
//this function enable ocl module to use customized cl_context and cl_command_queue
|
||||
//getDevice also need to be called before this function
|
||||
CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
|
||||
|
||||
//returns true when global OpenCL context is initialized
|
||||
CV_EXPORTS bool initialized();
|
||||
// set device you want to use
|
||||
CV_EXPORTS void setDevice(const DeviceInfo* info);
|
||||
|
||||
//////////////////////////////// Error handling ////////////////////////
|
||||
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
|
||||
|
||||
//////////////////////////////// OpenCL context ////////////////////////
|
||||
//This is a global singleton class used to represent a OpenCL context.
|
||||
enum FEATURE_TYPE
|
||||
{
|
||||
FEATURE_CL_DOUBLE = 1,
|
||||
FEATURE_CL_UNIFIED_MEM,
|
||||
FEATURE_CL_VER_1_2
|
||||
};
|
||||
|
||||
// Represents OpenCL context, interface
|
||||
class CV_EXPORTS Context
|
||||
{
|
||||
protected:
|
||||
Context();
|
||||
friend class auto_ptr<Context>;
|
||||
friend bool initialized();
|
||||
private:
|
||||
static auto_ptr<Context> clCxt;
|
||||
static int val;
|
||||
Context() { }
|
||||
~Context() { }
|
||||
public:
|
||||
~Context();
|
||||
void release();
|
||||
Info::Impl* impl;
|
||||
|
||||
static Context* getContext();
|
||||
static void setContext(Info &oclinfo);
|
||||
|
||||
enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
|
||||
bool supportsFeature(int ftype) const;
|
||||
size_t computeUnits() const;
|
||||
void* oclContext();
|
||||
void* oclCommandQueue();
|
||||
bool supportsFeature(FEATURE_TYPE featureType) const;
|
||||
const DeviceInfo& getDeviceInfo() const;
|
||||
|
||||
const void* getOpenCLContextPtr() const;
|
||||
const void* getOpenCLCommandQueuePtr() const;
|
||||
const void* getOpenCLDeviceIDPtr() const;
|
||||
};
|
||||
|
||||
inline const void *getClContextPtr()
|
||||
{
|
||||
return Context::getContext()->getOpenCLContextPtr();
|
||||
}
|
||||
|
||||
inline const void *getClCommandQueuePtr()
|
||||
{
|
||||
return Context::getContext()->getOpenCLCommandQueuePtr();
|
||||
}
|
||||
|
||||
bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType);
|
||||
|
||||
void CV_EXPORTS finish();
|
||||
|
||||
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
|
||||
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
|
||||
const char **source, string kernelName,
|
||||
@@ -384,7 +419,7 @@ namespace cv
|
||||
uchar *dataend;
|
||||
|
||||
//! OpenCL context associated with the oclMat object.
|
||||
Context *clCxt;
|
||||
Context *clCxt; // TODO clCtx
|
||||
//add offset for handle ROI, calculated in byte
|
||||
int offset;
|
||||
//add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
|
||||
@@ -1879,11 +1914,6 @@ namespace cv
|
||||
oclMat temp5;
|
||||
};
|
||||
|
||||
static inline size_t divUp(size_t total, size_t grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
/*!***************K Nearest Neighbour*************!*/
|
||||
class CV_EXPORTS KNearestNeighbour: public CvKNearest
|
||||
{
|
||||
|
@@ -52,120 +52,138 @@
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
namespace ocl
|
||||
{
|
||||
|
||||
inline cl_device_id getClDeviceID(const Context *ctx)
|
||||
{
|
||||
return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
|
||||
}
|
||||
|
||||
inline cl_context getClContext(const Context *ctx)
|
||||
{
|
||||
return *(cl_context*)(ctx->getOpenCLContextPtr());
|
||||
}
|
||||
|
||||
inline cl_command_queue getClCommandQueue(const Context *ctx)
|
||||
{
|
||||
return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr());
|
||||
}
|
||||
|
||||
enum openCLMemcpyKind
|
||||
{
|
||||
clMemcpyHostToDevice = 0,
|
||||
clMemcpyDeviceToHost,
|
||||
clMemcpyDeviceToDevice
|
||||
};
|
||||
///////////////////////////OpenCL call wrappers////////////////////////////
|
||||
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
|
||||
size_t widthInBytes, size_t height);
|
||||
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
|
||||
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
|
||||
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
||||
const void *src, size_t spitch,
|
||||
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
|
||||
void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
||||
const void *src, size_t spitch,
|
||||
size_t width, size_t height, int src_offset);
|
||||
void CV_EXPORTS openCLFree(void *devPtr);
|
||||
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
|
||||
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
|
||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
||||
const char **source, std::string kernelName);
|
||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
||||
const char **source, std::string kernelName, const char *build_options);
|
||||
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
|
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
|
||||
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
||||
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
|
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
|
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
||||
int depth, const char *build_options);
|
||||
|
||||
cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
||||
const size_t size);
|
||||
|
||||
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
|
||||
|
||||
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
|
||||
|
||||
enum FLUSH_MODE
|
||||
{
|
||||
CLFINISH = 0,
|
||||
CLFLUSH,
|
||||
DISABLE
|
||||
};
|
||||
|
||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
|
||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
||||
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
|
||||
// bind oclMat to OpenCL image textures
|
||||
// note:
|
||||
// 1. there is no memory management. User need to explicitly release the resource
|
||||
// 2. for faster clamping, there is no buffer padding for the constructed texture
|
||||
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
|
||||
void CV_EXPORTS releaseTexture(cl_mem& texture);
|
||||
|
||||
//Represents an image texture object
|
||||
class CV_EXPORTS TextureCL
|
||||
{
|
||||
public:
|
||||
TextureCL(cl_mem tex, int r, int c, int t)
|
||||
: tex_(tex), rows(r), cols(c), type(t) {}
|
||||
~TextureCL()
|
||||
{
|
||||
enum openCLMemcpyKind
|
||||
{
|
||||
clMemcpyHostToDevice = 0,
|
||||
clMemcpyDeviceToHost,
|
||||
clMemcpyDeviceToDevice
|
||||
};
|
||||
///////////////////////////OpenCL call wrappers////////////////////////////
|
||||
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
|
||||
size_t widthInBytes, size_t height);
|
||||
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
|
||||
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
|
||||
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
||||
const void *src, size_t spitch,
|
||||
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
|
||||
void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
||||
const void *src, size_t spitch,
|
||||
size_t width, size_t height, int src_offset);
|
||||
void CV_EXPORTS openCLFree(void *devPtr);
|
||||
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
|
||||
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
|
||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
||||
const char **source, std::string kernelName);
|
||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
||||
const char **source, std::string kernelName, const char *build_options);
|
||||
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
|
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
|
||||
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
||||
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
|
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
|
||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
||||
int depth, const char *build_options);
|
||||
openCLFree(tex_);
|
||||
}
|
||||
operator cl_mem()
|
||||
{
|
||||
return tex_;
|
||||
}
|
||||
cl_mem const tex_;
|
||||
const int rows;
|
||||
const int cols;
|
||||
const int type;
|
||||
private:
|
||||
//disable assignment
|
||||
void operator=(const TextureCL&);
|
||||
};
|
||||
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
|
||||
// note:
|
||||
// for faster clamping, there is no buffer padding for the constructed texture
|
||||
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
|
||||
|
||||
cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
||||
const size_t size);
|
||||
// returns whether the current context supports image2d_t format or not
|
||||
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
|
||||
|
||||
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
|
||||
bool CV_EXPORTS isCpuDevice();
|
||||
|
||||
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
|
||||
size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel);
|
||||
|
||||
enum FLUSH_MODE
|
||||
{
|
||||
CLFINISH = 0,
|
||||
CLFLUSH,
|
||||
DISABLE
|
||||
};
|
||||
|
||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
|
||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
||||
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
|
||||
// bind oclMat to OpenCL image textures
|
||||
// note:
|
||||
// 1. there is no memory management. User need to explicitly release the resource
|
||||
// 2. for faster clamping, there is no buffer padding for the constructed texture
|
||||
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
|
||||
void CV_EXPORTS releaseTexture(cl_mem& texture);
|
||||
|
||||
//Represents an image texture object
|
||||
class CV_EXPORTS TextureCL
|
||||
{
|
||||
public:
|
||||
TextureCL(cl_mem tex, int r, int c, int t)
|
||||
: tex_(tex), rows(r), cols(c), type(t) {}
|
||||
~TextureCL()
|
||||
{
|
||||
openCLFree(tex_);
|
||||
}
|
||||
operator cl_mem()
|
||||
{
|
||||
return tex_;
|
||||
}
|
||||
cl_mem const tex_;
|
||||
const int rows;
|
||||
const int cols;
|
||||
const int type;
|
||||
private:
|
||||
//disable assignment
|
||||
void operator=(const TextureCL&);
|
||||
};
|
||||
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
|
||||
// note:
|
||||
// for faster clamping, there is no buffer padding for the constructed texture
|
||||
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
|
||||
inline size_t divUp(size_t total, size_t grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
|
||||
// returns whether the current context supports image2d_t format or not
|
||||
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
|
||||
|
||||
// the enums are used to query device information
|
||||
// currently only support wavefront size queries
|
||||
enum DEVICE_INFO
|
||||
{
|
||||
WAVEFRONT_SIZE, //in AMD speak
|
||||
IS_CPU_DEVICE //check if the device is CPU
|
||||
};
|
||||
template<DEVICE_INFO _it, typename _ty>
|
||||
_ty queryDeviceInfo(cl_kernel kernel = NULL);
|
||||
|
||||
template<>
|
||||
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
|
||||
template<>
|
||||
size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
|
||||
template<>
|
||||
bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
|
||||
|
||||
unsigned long CV_EXPORTS queryLocalMemInfo();
|
||||
}//namespace ocl
|
||||
inline size_t roundUp(size_t sz, size_t n)
|
||||
{
|
||||
// we don't assume that n is a power of 2 (see alignSize)
|
||||
// equal to divUp(sz, n) * n
|
||||
size_t t = sz + n - 1;
|
||||
size_t rem = t % n;
|
||||
size_t result = t - rem;
|
||||
return result;
|
||||
}
|
||||
|
||||
}//namespace ocl
|
||||
}//namespace cv
|
||||
|
||||
#endif //__OPENCV_OCL_PRIVATE_UTIL__
|
||||
|
Reference in New Issue
Block a user