ocl: split initialization.cpp into 3 files: context, operations, programcache

This commit is contained in:
Alexander Alekhin
2013-09-20 19:19:52 +04:00
parent 8e75947a7d
commit e8d9ed8955
36 changed files with 1699 additions and 1534 deletions

View File

@@ -57,8 +57,7 @@ namespace cv
{
namespace ocl
{
using std::auto_ptr;
enum
enum DeviceType
{
CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
CVCL_DEVICE_TYPE_CPU = (1 << 1),
@@ -93,77 +92,113 @@ namespace cv
//return -1 if the target type is unsupported, otherwise return 0
CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
//this class contains ocl runtime information
class CV_EXPORTS Info
// these classes contain OpenCL runtime information
struct PlatformInfo;
struct DeviceInfo
{
public:
struct Impl;
Impl *impl;
int _id; // reserved, don't use it
Info();
Info(const Info &m);
~Info();
void release();
Info &operator = (const Info &m);
std::vector<string> DeviceName;
DeviceType deviceType;
std::string deviceProfile;
std::string deviceVersion;
std::string deviceName;
std::string deviceVendor;
int deviceVendorId;
std::string deviceDriverVersion;
std::string deviceExtensions;
size_t maxWorkGroupSize;
std::vector<size_t> maxWorkItemSizes;
int maxComputeUnits;
size_t localMemorySize;
int deviceVersionMajor;
int deviceVersionMinor;
bool haveDoubleSupport;
bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
std::string compilationExtraOptions;
const PlatformInfo* platform;
DeviceInfo();
};
struct PlatformInfo
{
int _id; // reserved, don't use it
std::string platformProfile;
std::string platformVersion;
std::string platformName;
std::string platformVendor;
std::string platformExtensons;
int platformVersionMajor;
int platformVersionMinor;
std::vector<const DeviceInfo*> devices;
PlatformInfo();
};
//////////////////////////////// Initialization & Info ////////////////////////
//this function may be obsoleted
//CV_EXPORTS cl_device_id getDevice();
//the function must be called before any other cv::ocl::functions, it initialize ocl runtime
//each Info relates to an OpenCL platform
//there is one or more devices in each platform, each one has a separate name
CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
typedef std::vector<const PlatformInfo*> PlatformsInfo;
//set device you want to use, optional function after getDevice be called
//the devnum is the index of the selected device in DeviceName vector of INfo
CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
//returns cl_context *
CV_EXPORTS void* getoclContext();
//returns cl_command_queue *
CV_EXPORTS void* getoclCommandQueue();
typedef std::vector<const DeviceInfo*> DevicesInfo;
//explicit call clFinish. The global command queue will be used.
CV_EXPORTS void finish();
CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
const PlatformInfo* platform = NULL);
//this function enable ocl module to use customized cl_context and cl_command_queue
//getDevice also need to be called before this function
CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
//returns true when global OpenCL context is initialized
CV_EXPORTS bool initialized();
// set device you want to use
CV_EXPORTS void setDevice(const DeviceInfo* info);
//////////////////////////////// Error handling ////////////////////////
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
//////////////////////////////// OpenCL context ////////////////////////
//This is a global singleton class used to represent a OpenCL context.
enum FEATURE_TYPE
{
FEATURE_CL_DOUBLE = 1,
FEATURE_CL_UNIFIED_MEM,
FEATURE_CL_VER_1_2
};
// Represents OpenCL context, interface
class CV_EXPORTS Context
{
protected:
Context();
friend class auto_ptr<Context>;
friend bool initialized();
private:
static auto_ptr<Context> clCxt;
static int val;
Context() { }
~Context() { }
public:
~Context();
void release();
Info::Impl* impl;
static Context* getContext();
static void setContext(Info &oclinfo);
enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
bool supportsFeature(int ftype) const;
size_t computeUnits() const;
void* oclContext();
void* oclCommandQueue();
bool supportsFeature(FEATURE_TYPE featureType) const;
const DeviceInfo& getDeviceInfo() const;
const void* getOpenCLContextPtr() const;
const void* getOpenCLCommandQueuePtr() const;
const void* getOpenCLDeviceIDPtr() const;
};
inline const void *getClContextPtr()
{
return Context::getContext()->getOpenCLContextPtr();
}
inline const void *getClCommandQueuePtr()
{
return Context::getContext()->getOpenCLCommandQueuePtr();
}
bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType);
void CV_EXPORTS finish();
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
const char **source, string kernelName,
@@ -384,7 +419,7 @@ namespace cv
uchar *dataend;
//! OpenCL context associated with the oclMat object.
Context *clCxt;
Context *clCxt; // TODO clCtx
//add offset for handle ROI, calculated in byte
int offset;
//add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
@@ -1879,11 +1914,6 @@ namespace cv
oclMat temp5;
};
static inline size_t divUp(size_t total, size_t grain)
{
return (total + grain - 1) / grain;
}
/*!***************K Nearest Neighbour*************!*/
class CV_EXPORTS KNearestNeighbour: public CvKNearest
{

View File

@@ -52,120 +52,138 @@
namespace cv
{
namespace ocl
namespace ocl
{
inline cl_device_id getClDeviceID(const Context *ctx)
{
return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
}
inline cl_context getClContext(const Context *ctx)
{
return *(cl_context*)(ctx->getOpenCLContextPtr());
}
inline cl_command_queue getClCommandQueue(const Context *ctx)
{
return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr());
}
enum openCLMemcpyKind
{
clMemcpyHostToDevice = 0,
clMemcpyDeviceToHost,
clMemcpyDeviceToDevice
};
///////////////////////////OpenCL call wrappers////////////////////////////
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height);
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
const void *src, size_t spitch,
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
const void *src, size_t spitch,
size_t width, size_t height, int src_offset);
void CV_EXPORTS openCLFree(void *devPtr);
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
const char **source, std::string kernelName);
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
const char **source, std::string kernelName, const char *build_options);
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options);
cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
const size_t size);
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
enum FLUSH_MODE
{
CLFINISH = 0,
CLFLUSH,
DISABLE
};
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
// bind oclMat to OpenCL image textures
// note:
// 1. there is no memory management. User need to explicitly release the resource
// 2. for faster clamping, there is no buffer padding for the constructed texture
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
void CV_EXPORTS releaseTexture(cl_mem& texture);
//Represents an image texture object
class CV_EXPORTS TextureCL
{
public:
TextureCL(cl_mem tex, int r, int c, int t)
: tex_(tex), rows(r), cols(c), type(t) {}
~TextureCL()
{
enum openCLMemcpyKind
{
clMemcpyHostToDevice = 0,
clMemcpyDeviceToHost,
clMemcpyDeviceToDevice
};
///////////////////////////OpenCL call wrappers////////////////////////////
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height);
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
const void *src, size_t spitch,
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
const void *src, size_t spitch,
size_t width, size_t height, int src_offset);
void CV_EXPORTS openCLFree(void *devPtr);
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
const char **source, std::string kernelName);
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
const char **source, std::string kernelName, const char *build_options);
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
size_t globalThreads[3], size_t localThreads[3],
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options);
openCLFree(tex_);
}
operator cl_mem()
{
return tex_;
}
cl_mem const tex_;
const int rows;
const int cols;
const int type;
private:
//disable assignment
void operator=(const TextureCL&);
};
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
// note:
// for faster clamping, there is no buffer padding for the constructed texture
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
const size_t size);
// returns whether the current context supports image2d_t format or not
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
bool CV_EXPORTS isCpuDevice();
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel);
enum FLUSH_MODE
{
CLFINISH = 0,
CLFLUSH,
DISABLE
};
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
// bind oclMat to OpenCL image textures
// note:
// 1. there is no memory management. User need to explicitly release the resource
// 2. for faster clamping, there is no buffer padding for the constructed texture
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
void CV_EXPORTS releaseTexture(cl_mem& texture);
//Represents an image texture object
class CV_EXPORTS TextureCL
{
public:
TextureCL(cl_mem tex, int r, int c, int t)
: tex_(tex), rows(r), cols(c), type(t) {}
~TextureCL()
{
openCLFree(tex_);
}
operator cl_mem()
{
return tex_;
}
cl_mem const tex_;
const int rows;
const int cols;
const int type;
private:
//disable assignment
void operator=(const TextureCL&);
};
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
// note:
// for faster clamping, there is no buffer padding for the constructed texture
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
inline size_t divUp(size_t total, size_t grain)
{
return (total + grain - 1) / grain;
}
// returns whether the current context supports image2d_t format or not
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
// the enums are used to query device information
// currently only support wavefront size queries
enum DEVICE_INFO
{
WAVEFRONT_SIZE, //in AMD speak
IS_CPU_DEVICE //check if the device is CPU
};
template<DEVICE_INFO _it, typename _ty>
_ty queryDeviceInfo(cl_kernel kernel = NULL);
template<>
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
template<>
size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
template<>
bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
unsigned long CV_EXPORTS queryLocalMemInfo();
}//namespace ocl
inline size_t roundUp(size_t sz, size_t n)
{
// we don't assume that n is a power of 2 (see alignSize)
// equal to divUp(sz, n) * n
size_t t = sz + n - 1;
size_t rem = t % n;
size_t result = t - rem;
return result;
}
}//namespace ocl
}//namespace cv
#endif //__OPENCV_OCL_PRIVATE_UTIL__