ocl: split initialization.cpp into 3 files: context, operations, programcache
This commit is contained in:
parent
8e75947a7d
commit
e8d9ed8955
@ -74,7 +74,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
cl_kernel kernel;
|
cl_kernel kernel;
|
||||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
|
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
|
||||||
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
|
size_t wave_size = queryWaveFrontSize(kernel);
|
||||||
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
|
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
|
||||||
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
|
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
|
||||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
|
||||||
|
@ -57,8 +57,7 @@ namespace cv
|
|||||||
{
|
{
|
||||||
namespace ocl
|
namespace ocl
|
||||||
{
|
{
|
||||||
using std::auto_ptr;
|
enum DeviceType
|
||||||
enum
|
|
||||||
{
|
{
|
||||||
CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
|
CVCL_DEVICE_TYPE_DEFAULT = (1 << 0),
|
||||||
CVCL_DEVICE_TYPE_CPU = (1 << 1),
|
CVCL_DEVICE_TYPE_CPU = (1 << 1),
|
||||||
@ -93,77 +92,113 @@ namespace cv
|
|||||||
//return -1 if the target type is unsupported, otherwise return 0
|
//return -1 if the target type is unsupported, otherwise return 0
|
||||||
CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
|
CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);
|
||||||
|
|
||||||
//this class contains ocl runtime information
|
// these classes contain OpenCL runtime information
|
||||||
class CV_EXPORTS Info
|
|
||||||
|
struct PlatformInfo;
|
||||||
|
|
||||||
|
struct DeviceInfo
|
||||||
{
|
{
|
||||||
public:
|
int _id; // reserved, don't use it
|
||||||
struct Impl;
|
|
||||||
Impl *impl;
|
|
||||||
|
|
||||||
Info();
|
DeviceType deviceType;
|
||||||
Info(const Info &m);
|
std::string deviceProfile;
|
||||||
~Info();
|
std::string deviceVersion;
|
||||||
void release();
|
std::string deviceName;
|
||||||
Info &operator = (const Info &m);
|
std::string deviceVendor;
|
||||||
std::vector<string> DeviceName;
|
int deviceVendorId;
|
||||||
|
std::string deviceDriverVersion;
|
||||||
|
std::string deviceExtensions;
|
||||||
|
|
||||||
|
size_t maxWorkGroupSize;
|
||||||
|
std::vector<size_t> maxWorkItemSizes;
|
||||||
|
int maxComputeUnits;
|
||||||
|
size_t localMemorySize;
|
||||||
|
|
||||||
|
int deviceVersionMajor;
|
||||||
|
int deviceVersionMinor;
|
||||||
|
|
||||||
|
bool haveDoubleSupport;
|
||||||
|
bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
|
||||||
|
|
||||||
|
std::string compilationExtraOptions;
|
||||||
|
|
||||||
|
const PlatformInfo* platform;
|
||||||
|
|
||||||
|
DeviceInfo();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct PlatformInfo
|
||||||
|
{
|
||||||
|
int _id; // reserved, don't use it
|
||||||
|
|
||||||
|
std::string platformProfile;
|
||||||
|
std::string platformVersion;
|
||||||
|
std::string platformName;
|
||||||
|
std::string platformVendor;
|
||||||
|
std::string platformExtensons;
|
||||||
|
|
||||||
|
int platformVersionMajor;
|
||||||
|
int platformVersionMinor;
|
||||||
|
|
||||||
|
std::vector<const DeviceInfo*> devices;
|
||||||
|
|
||||||
|
PlatformInfo();
|
||||||
|
};
|
||||||
|
|
||||||
//////////////////////////////// Initialization & Info ////////////////////////
|
//////////////////////////////// Initialization & Info ////////////////////////
|
||||||
//this function may be obsoleted
|
typedef std::vector<const PlatformInfo*> PlatformsInfo;
|
||||||
//CV_EXPORTS cl_device_id getDevice();
|
|
||||||
//the function must be called before any other cv::ocl::functions, it initialize ocl runtime
|
|
||||||
//each Info relates to an OpenCL platform
|
|
||||||
//there is one or more devices in each platform, each one has a separate name
|
|
||||||
CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
|
|
||||||
|
|
||||||
//set device you want to use, optional function after getDevice be called
|
CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);
|
||||||
//the devnum is the index of the selected device in DeviceName vector of INfo
|
|
||||||
CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
|
|
||||||
|
|
||||||
//The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
|
typedef std::vector<const DeviceInfo*> DevicesInfo;
|
||||||
//returns cl_context *
|
|
||||||
CV_EXPORTS void* getoclContext();
|
|
||||||
//returns cl_command_queue *
|
|
||||||
CV_EXPORTS void* getoclCommandQueue();
|
|
||||||
|
|
||||||
//explicit call clFinish. The global command queue will be used.
|
CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
|
||||||
CV_EXPORTS void finish();
|
const PlatformInfo* platform = NULL);
|
||||||
|
|
||||||
//this function enable ocl module to use customized cl_context and cl_command_queue
|
// set device you want to use
|
||||||
//getDevice also need to be called before this function
|
CV_EXPORTS void setDevice(const DeviceInfo* info);
|
||||||
CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
|
|
||||||
|
|
||||||
//returns true when global OpenCL context is initialized
|
|
||||||
CV_EXPORTS bool initialized();
|
|
||||||
|
|
||||||
//////////////////////////////// Error handling ////////////////////////
|
//////////////////////////////// Error handling ////////////////////////
|
||||||
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
|
CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);
|
||||||
|
|
||||||
//////////////////////////////// OpenCL context ////////////////////////
|
enum FEATURE_TYPE
|
||||||
//This is a global singleton class used to represent a OpenCL context.
|
{
|
||||||
|
FEATURE_CL_DOUBLE = 1,
|
||||||
|
FEATURE_CL_UNIFIED_MEM,
|
||||||
|
FEATURE_CL_VER_1_2
|
||||||
|
};
|
||||||
|
|
||||||
|
// Represents OpenCL context, interface
|
||||||
class CV_EXPORTS Context
|
class CV_EXPORTS Context
|
||||||
{
|
{
|
||||||
protected:
|
protected:
|
||||||
Context();
|
Context() { }
|
||||||
friend class auto_ptr<Context>;
|
~Context() { }
|
||||||
friend bool initialized();
|
|
||||||
private:
|
|
||||||
static auto_ptr<Context> clCxt;
|
|
||||||
static int val;
|
|
||||||
public:
|
public:
|
||||||
~Context();
|
|
||||||
void release();
|
|
||||||
Info::Impl* impl;
|
|
||||||
|
|
||||||
static Context* getContext();
|
static Context* getContext();
|
||||||
static void setContext(Info &oclinfo);
|
|
||||||
|
|
||||||
enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
|
bool supportsFeature(FEATURE_TYPE featureType) const;
|
||||||
bool supportsFeature(int ftype) const;
|
const DeviceInfo& getDeviceInfo() const;
|
||||||
size_t computeUnits() const;
|
|
||||||
void* oclContext();
|
const void* getOpenCLContextPtr() const;
|
||||||
void* oclCommandQueue();
|
const void* getOpenCLCommandQueuePtr() const;
|
||||||
|
const void* getOpenCLDeviceIDPtr() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
inline const void *getClContextPtr()
|
||||||
|
{
|
||||||
|
return Context::getContext()->getOpenCLContextPtr();
|
||||||
|
}
|
||||||
|
|
||||||
|
inline const void *getClCommandQueuePtr()
|
||||||
|
{
|
||||||
|
return Context::getContext()->getOpenCLCommandQueuePtr();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType);
|
||||||
|
|
||||||
|
void CV_EXPORTS finish();
|
||||||
|
|
||||||
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
|
//! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
|
||||||
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
|
CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
|
||||||
const char **source, string kernelName,
|
const char **source, string kernelName,
|
||||||
@ -384,7 +419,7 @@ namespace cv
|
|||||||
uchar *dataend;
|
uchar *dataend;
|
||||||
|
|
||||||
//! OpenCL context associated with the oclMat object.
|
//! OpenCL context associated with the oclMat object.
|
||||||
Context *clCxt;
|
Context *clCxt; // TODO clCtx
|
||||||
//add offset for handle ROI, calculated in byte
|
//add offset for handle ROI, calculated in byte
|
||||||
int offset;
|
int offset;
|
||||||
//add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
|
//add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
|
||||||
@ -1879,11 +1914,6 @@ namespace cv
|
|||||||
oclMat temp5;
|
oclMat temp5;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline size_t divUp(size_t total, size_t grain)
|
|
||||||
{
|
|
||||||
return (total + grain - 1) / grain;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*!***************K Nearest Neighbour*************!*/
|
/*!***************K Nearest Neighbour*************!*/
|
||||||
class CV_EXPORTS KNearestNeighbour: public CvKNearest
|
class CV_EXPORTS KNearestNeighbour: public CvKNearest
|
||||||
{
|
{
|
||||||
|
@ -52,120 +52,138 @@
|
|||||||
|
|
||||||
namespace cv
|
namespace cv
|
||||||
{
|
{
|
||||||
namespace ocl
|
namespace ocl
|
||||||
|
{
|
||||||
|
|
||||||
|
inline cl_device_id getClDeviceID(const Context *ctx)
|
||||||
|
{
|
||||||
|
return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cl_context getClContext(const Context *ctx)
|
||||||
|
{
|
||||||
|
return *(cl_context*)(ctx->getOpenCLContextPtr());
|
||||||
|
}
|
||||||
|
|
||||||
|
inline cl_command_queue getClCommandQueue(const Context *ctx)
|
||||||
|
{
|
||||||
|
return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr());
|
||||||
|
}
|
||||||
|
|
||||||
|
enum openCLMemcpyKind
|
||||||
|
{
|
||||||
|
clMemcpyHostToDevice = 0,
|
||||||
|
clMemcpyDeviceToHost,
|
||||||
|
clMemcpyDeviceToDevice
|
||||||
|
};
|
||||||
|
///////////////////////////OpenCL call wrappers////////////////////////////
|
||||||
|
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
|
||||||
|
size_t widthInBytes, size_t height);
|
||||||
|
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
|
||||||
|
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
|
||||||
|
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
||||||
|
const void *src, size_t spitch,
|
||||||
|
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
|
||||||
|
void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
||||||
|
const void *src, size_t spitch,
|
||||||
|
size_t width, size_t height, int src_offset);
|
||||||
|
void CV_EXPORTS openCLFree(void *devPtr);
|
||||||
|
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
|
||||||
|
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
|
||||||
|
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
||||||
|
const char **source, std::string kernelName);
|
||||||
|
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
||||||
|
const char **source, std::string kernelName, const char *build_options);
|
||||||
|
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
|
||||||
|
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
|
||||||
|
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
||||||
|
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
|
||||||
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
|
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
|
||||||
|
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||||
|
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
|
||||||
|
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||||
|
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
||||||
|
int depth, const char *build_options);
|
||||||
|
|
||||||
|
cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
||||||
|
const size_t size);
|
||||||
|
|
||||||
|
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
|
||||||
|
|
||||||
|
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
|
||||||
|
|
||||||
|
enum FLUSH_MODE
|
||||||
|
{
|
||||||
|
CLFINISH = 0,
|
||||||
|
CLFLUSH,
|
||||||
|
DISABLE
|
||||||
|
};
|
||||||
|
|
||||||
|
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||||
|
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
|
||||||
|
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
||||||
|
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
||||||
|
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
|
||||||
|
// bind oclMat to OpenCL image textures
|
||||||
|
// note:
|
||||||
|
// 1. there is no memory management. User need to explicitly release the resource
|
||||||
|
// 2. for faster clamping, there is no buffer padding for the constructed texture
|
||||||
|
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
|
||||||
|
void CV_EXPORTS releaseTexture(cl_mem& texture);
|
||||||
|
|
||||||
|
//Represents an image texture object
|
||||||
|
class CV_EXPORTS TextureCL
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
TextureCL(cl_mem tex, int r, int c, int t)
|
||||||
|
: tex_(tex), rows(r), cols(c), type(t) {}
|
||||||
|
~TextureCL()
|
||||||
{
|
{
|
||||||
enum openCLMemcpyKind
|
openCLFree(tex_);
|
||||||
{
|
}
|
||||||
clMemcpyHostToDevice = 0,
|
operator cl_mem()
|
||||||
clMemcpyDeviceToHost,
|
{
|
||||||
clMemcpyDeviceToDevice
|
return tex_;
|
||||||
};
|
}
|
||||||
///////////////////////////OpenCL call wrappers////////////////////////////
|
cl_mem const tex_;
|
||||||
void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
|
const int rows;
|
||||||
size_t widthInBytes, size_t height);
|
const int cols;
|
||||||
void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
|
const int type;
|
||||||
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
|
private:
|
||||||
void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
|
//disable assignment
|
||||||
const void *src, size_t spitch,
|
void operator=(const TextureCL&);
|
||||||
size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
|
};
|
||||||
void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
|
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
|
||||||
const void *src, size_t spitch,
|
// note:
|
||||||
size_t width, size_t height, int src_offset);
|
// for faster clamping, there is no buffer padding for the constructed texture
|
||||||
void CV_EXPORTS openCLFree(void *devPtr);
|
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
|
||||||
cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
|
|
||||||
void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
|
|
||||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
|
||||||
const char **source, std::string kernelName);
|
|
||||||
cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
|
|
||||||
const char **source, std::string kernelName, const char *build_options);
|
|
||||||
void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
|
|
||||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
|
|
||||||
int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
|
|
||||||
void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
|
|
||||||
size_t globalThreads[3], size_t localThreads[3],
|
|
||||||
std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
|
|
||||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
|
||||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
|
|
||||||
void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
|
||||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
|
||||||
int depth, const char *build_options);
|
|
||||||
|
|
||||||
cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
// returns whether the current context supports image2d_t format or not
|
||||||
const size_t size);
|
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
|
||||||
|
|
||||||
cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
|
bool CV_EXPORTS isCpuDevice();
|
||||||
|
|
||||||
int CV_EXPORTS savetofile(const Context *clcxt, cl_program &program, const char *fileName);
|
size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel);
|
||||||
|
|
||||||
enum FLUSH_MODE
|
|
||||||
{
|
|
||||||
CLFINISH = 0,
|
|
||||||
CLFLUSH,
|
|
||||||
DISABLE
|
|
||||||
};
|
|
||||||
|
|
||||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
|
||||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
|
|
||||||
void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
|
|
||||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels,
|
|
||||||
int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
|
|
||||||
// bind oclMat to OpenCL image textures
|
|
||||||
// note:
|
|
||||||
// 1. there is no memory management. User need to explicitly release the resource
|
|
||||||
// 2. for faster clamping, there is no buffer padding for the constructed texture
|
|
||||||
cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
|
|
||||||
void CV_EXPORTS releaseTexture(cl_mem& texture);
|
|
||||||
|
|
||||||
//Represents an image texture object
|
inline size_t divUp(size_t total, size_t grain)
|
||||||
class CV_EXPORTS TextureCL
|
{
|
||||||
{
|
return (total + grain - 1) / grain;
|
||||||
public:
|
}
|
||||||
TextureCL(cl_mem tex, int r, int c, int t)
|
|
||||||
: tex_(tex), rows(r), cols(c), type(t) {}
|
|
||||||
~TextureCL()
|
|
||||||
{
|
|
||||||
openCLFree(tex_);
|
|
||||||
}
|
|
||||||
operator cl_mem()
|
|
||||||
{
|
|
||||||
return tex_;
|
|
||||||
}
|
|
||||||
cl_mem const tex_;
|
|
||||||
const int rows;
|
|
||||||
const int cols;
|
|
||||||
const int type;
|
|
||||||
private:
|
|
||||||
//disable assignment
|
|
||||||
void operator=(const TextureCL&);
|
|
||||||
};
|
|
||||||
// bind oclMat to OpenCL image textures and retunrs an TextureCL object
|
|
||||||
// note:
|
|
||||||
// for faster clamping, there is no buffer padding for the constructed texture
|
|
||||||
Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
|
|
||||||
|
|
||||||
// returns whether the current context supports image2d_t format or not
|
inline size_t roundUp(size_t sz, size_t n)
|
||||||
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
|
{
|
||||||
|
// we don't assume that n is a power of 2 (see alignSize)
|
||||||
// the enums are used to query device information
|
// equal to divUp(sz, n) * n
|
||||||
// currently only support wavefront size queries
|
size_t t = sz + n - 1;
|
||||||
enum DEVICE_INFO
|
size_t rem = t % n;
|
||||||
{
|
size_t result = t - rem;
|
||||||
WAVEFRONT_SIZE, //in AMD speak
|
return result;
|
||||||
IS_CPU_DEVICE //check if the device is CPU
|
}
|
||||||
};
|
|
||||||
template<DEVICE_INFO _it, typename _ty>
|
|
||||||
_ty queryDeviceInfo(cl_kernel kernel = NULL);
|
|
||||||
|
|
||||||
template<>
|
|
||||||
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
|
|
||||||
template<>
|
|
||||||
size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
|
|
||||||
template<>
|
|
||||||
bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
|
|
||||||
|
|
||||||
unsigned long CV_EXPORTS queryLocalMemInfo();
|
|
||||||
}//namespace ocl
|
|
||||||
|
|
||||||
|
}//namespace ocl
|
||||||
}//namespace cv
|
}//namespace cv
|
||||||
|
|
||||||
#endif //__OPENCV_OCL_PRIVATE_UTIL__
|
#endif //__OPENCV_OCL_PRIVATE_UTIL__
|
||||||
|
@ -56,7 +56,7 @@ int main(int argc, char ** argv)
|
|||||||
const char * keys =
|
const char * keys =
|
||||||
"{ h | help | false | print help message }"
|
"{ h | help | false | print help message }"
|
||||||
"{ t | type | gpu | set device type:cpu or gpu}"
|
"{ t | type | gpu | set device type:cpu or gpu}"
|
||||||
"{ p | platform | 0 | set platform id }"
|
"{ p | platform | -1 | set platform id }"
|
||||||
"{ d | device | 0 | set device id }";
|
"{ d | device | 0 | set device id }";
|
||||||
|
|
||||||
CommandLineParser cmd(argc, argv, keys);
|
CommandLineParser cmd(argc, argv, keys);
|
||||||
@ -68,28 +68,34 @@ int main(int argc, char ** argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
string type = cmd.get<string>("type");
|
string type = cmd.get<string>("type");
|
||||||
unsigned int pid = cmd.get<unsigned int>("platform");
|
int pid = cmd.get<int>("platform");
|
||||||
int device = cmd.get<int>("device");
|
int device = cmd.get<int>("device");
|
||||||
|
|
||||||
int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU :
|
int flag = type == "cpu" ? cv::ocl::CVCL_DEVICE_TYPE_CPU :
|
||||||
cv::ocl::CVCL_DEVICE_TYPE_GPU;
|
cv::ocl::CVCL_DEVICE_TYPE_GPU;
|
||||||
|
|
||||||
std::vector<cv::ocl::Info> oclinfo;
|
cv::ocl::PlatformsInfo platformsInfo;
|
||||||
int devnums = cv::ocl::getDevice(oclinfo, flag);
|
cv::ocl::getOpenCLPlatforms(platformsInfo);
|
||||||
if (devnums <= device || device < 0)
|
if (pid >= (int)platformsInfo.size())
|
||||||
{
|
{
|
||||||
std::cout << "device invalid\n";
|
std::cout << "platform is invalid\n";
|
||||||
return -1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pid >= oclinfo.size())
|
cv::ocl::DevicesInfo devicesInfo;
|
||||||
|
int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]);
|
||||||
|
if (device < 0 || device >= devnums)
|
||||||
{
|
{
|
||||||
std::cout << "platform invalid\n";
|
std::cout << "device/platform invalid\n";
|
||||||
return -1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::ocl::setDevice(oclinfo[pid], device);
|
cv::ocl::setDevice(devicesInfo[device]);
|
||||||
cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE);
|
cv::ocl::setBinaryDiskCache(cv::ocl::CACHE_UPDATE);
|
||||||
|
|
||||||
|
cout << "Device type:" << type << endl
|
||||||
|
<< "Platform name:" << devicesInfo[device]->platform->platformName << endl
|
||||||
|
<< "Device name:" << devicesInfo[device]->deviceName << endl;
|
||||||
|
|
||||||
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls)
|
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls)
|
||||||
}
|
}
|
||||||
|
@ -51,50 +51,10 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include <iomanip>
|
#include "opencl_kernels.hpp"
|
||||||
|
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
using namespace std;
|
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
namespace ocl
|
|
||||||
{
|
|
||||||
//////////////////////////////// OpenCL kernel strings /////////////////////
|
|
||||||
|
|
||||||
extern const char *arithm_absdiff_nonsaturate;
|
|
||||||
extern const char *arithm_nonzero;
|
|
||||||
extern const char *arithm_sum;
|
|
||||||
extern const char *arithm_minMax;
|
|
||||||
extern const char *arithm_minMaxLoc;
|
|
||||||
extern const char *arithm_minMaxLoc_mask;
|
|
||||||
extern const char *arithm_LUT;
|
|
||||||
extern const char *arithm_add;
|
|
||||||
extern const char *arithm_add_mask;
|
|
||||||
extern const char *arithm_add_scalar;
|
|
||||||
extern const char *arithm_add_scalar_mask;
|
|
||||||
extern const char *arithm_bitwise_binary;
|
|
||||||
extern const char *arithm_bitwise_binary_mask;
|
|
||||||
extern const char *arithm_bitwise_binary_scalar;
|
|
||||||
extern const char *arithm_bitwise_binary_scalar_mask;
|
|
||||||
extern const char *arithm_bitwise_not;
|
|
||||||
extern const char *arithm_compare;
|
|
||||||
extern const char *arithm_transpose;
|
|
||||||
extern const char *arithm_flip;
|
|
||||||
extern const char *arithm_flip_rc;
|
|
||||||
extern const char *arithm_magnitude;
|
|
||||||
extern const char *arithm_cartToPolar;
|
|
||||||
extern const char *arithm_polarToCart;
|
|
||||||
extern const char *arithm_exp;
|
|
||||||
extern const char *arithm_log;
|
|
||||||
extern const char *arithm_addWeighted;
|
|
||||||
extern const char *arithm_phase;
|
|
||||||
extern const char *arithm_pow;
|
|
||||||
extern const char *arithm_setidentity;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
/////////////////////// add subtract multiply divide /////////////////////////
|
/////////////////////// add subtract multiply divide /////////////////////////
|
||||||
@ -106,7 +66,7 @@ static void arithmetic_run_generic(const oclMat &src1, const oclMat &src2, const
|
|||||||
oclMat &dst, int op_type, bool use_scalar = false)
|
oclMat &dst, int op_type, bool use_scalar = false)
|
||||||
{
|
{
|
||||||
Context *clCxt = src1.clCxt;
|
Context *clCxt = src1.clCxt;
|
||||||
bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE);
|
bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
|
||||||
if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F))
|
if (!hasDouble && (src1.depth() == CV_64F || src2.depth() == CV_64F || dst.depth() == CV_64F))
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
@ -264,7 +224,7 @@ void cv::ocl::absdiff(const oclMat &src1, const Scalar &src2, oclMat &dst)
|
|||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp,
|
static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int cmpOp,
|
||||||
string kernelName, const char **kernelString)
|
string kernelName, const cv::ocl::ProgramEntry* source)
|
||||||
{
|
{
|
||||||
CV_Assert(src1.type() == src2.type());
|
CV_Assert(src1.type() == src2.type());
|
||||||
dst.create(src1.size(), CV_8UC1);
|
dst.create(src1.size(), CV_8UC1);
|
||||||
@ -295,13 +255,13 @@ static void compare_run(const oclMat &src1, const oclMat &src2, oclMat &dst, int
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.cols ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads,
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
|
||||||
args, -1, -1, buildOptions.c_str());
|
args, -1, -1, buildOptions.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
|
void cv::ocl::compare(const oclMat &src1, const oclMat &src2, oclMat &dst , int cmpOp)
|
||||||
{
|
{
|
||||||
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
|
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
cout << "Selected device do not support double" << endl;
|
cout << "Selected device do not support double" << endl;
|
||||||
return;
|
return;
|
||||||
@ -358,7 +318,7 @@ Scalar arithmetic_sum(const oclMat &src, int type, int ddepth)
|
|||||||
{
|
{
|
||||||
CV_Assert(src.step % src.elemSize() == 0);
|
CV_Assert(src.step % src.elemSize() == 0);
|
||||||
|
|
||||||
size_t groupnum = src.clCxt->computeUnits();
|
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
|
||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
|
|
||||||
int dbsize = groupnum * src.oclchannels();
|
int dbsize = groupnum * src.oclchannels();
|
||||||
@ -385,7 +345,7 @@ typedef Scalar (*sumFunc)(const oclMat &src, int type, int ddepth);
|
|||||||
|
|
||||||
Scalar cv::ocl::sum(const oclMat &src)
|
Scalar cv::ocl::sum(const oclMat &src)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||||
}
|
}
|
||||||
@ -396,7 +356,7 @@ Scalar cv::ocl::sum(const oclMat &src)
|
|||||||
arithmetic_sum<double>
|
arithmetic_sum<double>
|
||||||
};
|
};
|
||||||
|
|
||||||
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
|
bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
|
||||||
int ddepth = std::max(src.depth(), CV_32S);
|
int ddepth = std::max(src.depth(), CV_32S);
|
||||||
if (!hasDouble && ddepth == CV_64F)
|
if (!hasDouble && ddepth == CV_64F)
|
||||||
ddepth = CV_32F;
|
ddepth = CV_32F;
|
||||||
@ -407,7 +367,7 @@ Scalar cv::ocl::sum(const oclMat &src)
|
|||||||
|
|
||||||
Scalar cv::ocl::absSum(const oclMat &src)
|
Scalar cv::ocl::absSum(const oclMat &src)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||||
}
|
}
|
||||||
@ -418,7 +378,7 @@ Scalar cv::ocl::absSum(const oclMat &src)
|
|||||||
arithmetic_sum<double>
|
arithmetic_sum<double>
|
||||||
};
|
};
|
||||||
|
|
||||||
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
|
bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
|
||||||
int ddepth = std::max(src.depth(), CV_32S);
|
int ddepth = std::max(src.depth(), CV_32S);
|
||||||
if (!hasDouble && ddepth == CV_64F)
|
if (!hasDouble && ddepth == CV_64F)
|
||||||
ddepth = CV_32F;
|
ddepth = CV_32F;
|
||||||
@ -429,7 +389,7 @@ Scalar cv::ocl::absSum(const oclMat &src)
|
|||||||
|
|
||||||
Scalar cv::ocl::sqrSum(const oclMat &src)
|
Scalar cv::ocl::sqrSum(const oclMat &src)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||||
}
|
}
|
||||||
@ -440,7 +400,7 @@ Scalar cv::ocl::sqrSum(const oclMat &src)
|
|||||||
arithmetic_sum<double>
|
arithmetic_sum<double>
|
||||||
};
|
};
|
||||||
|
|
||||||
bool hasDouble = src.clCxt->supportsFeature(Context::CL_DOUBLE);
|
bool hasDouble = src.clCxt->supportsFeature(FEATURE_CL_DOUBLE);
|
||||||
int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F);
|
int ddepth = src.depth() <= CV_32S ? CV_32S : (hasDouble ? CV_64F : CV_32F);
|
||||||
|
|
||||||
sumFunc func = functab[ddepth - CV_32S];
|
sumFunc func = functab[ddepth - CV_32S];
|
||||||
@ -524,7 +484,7 @@ template <typename T, typename WT>
|
|||||||
void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
|
void arithmetic_minMax(const oclMat &src, double *minVal, double *maxVal,
|
||||||
const oclMat &mask, oclMat &buf)
|
const oclMat &mask, oclMat &buf)
|
||||||
{
|
{
|
||||||
size_t groupnum = src.clCxt->computeUnits();
|
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
|
||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
|
|
||||||
int dbsize = groupnum * 2 * src.elemSize();
|
int dbsize = groupnum * 2 * src.elemSize();
|
||||||
@ -566,7 +526,7 @@ void cv::ocl::minMax_buf(const oclMat &src, double *minVal, double *maxVal, cons
|
|||||||
if (minVal == NULL && maxVal == NULL)
|
if (minVal == NULL && maxVal == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||||
}
|
}
|
||||||
@ -699,7 +659,7 @@ double cv::ocl::norm(const oclMat &src1, const oclMat &src2, int normType)
|
|||||||
|
|
||||||
static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
|
static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kernelName)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -746,7 +706,7 @@ static void arithmetic_flip_rows_run(const oclMat &src, oclMat &dst, string kern
|
|||||||
|
|
||||||
static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
|
static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kernelName, bool isVertical)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -792,9 +752,9 @@ static void arithmetic_flip_cols_run(const oclMat &src, oclMat &dst, string kern
|
|||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||||
|
|
||||||
const char **kernelString = isVertical ? &arithm_flip_rc : &arithm_flip;
|
const cv::ocl::ProgramEntry* source = isVertical ? &arithm_flip_rc : &arithm_flip;
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, src.oclchannels(), depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
|
void cv::ocl::flip(const oclMat &src, oclMat &dst, int flipCode)
|
||||||
@ -860,10 +820,10 @@ void cv::ocl::LUT(const oclMat &src, const oclMat &lut, oclMat &dst)
|
|||||||
//////////////////////////////// exp log /////////////////////////////////////
|
//////////////////////////////// exp log /////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const char **kernelString)
|
static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
|
||||||
{
|
{
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -893,7 +853,7 @@ static void arithmetic_exp_log_run(const oclMat &src, oclMat &dst, string kernel
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&srcstep1 ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dststep1 ));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads,
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads,
|
||||||
args, src.oclchannels(), -1, buildOptions.c_str());
|
args, src.oclchannels(), -1, buildOptions.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -913,7 +873,7 @@ void cv::ocl::log(const oclMat &src, oclMat &dst)
|
|||||||
|
|
||||||
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
|
static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
|
||||||
{
|
{
|
||||||
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
|
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -955,9 +915,9 @@ void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
|
|||||||
arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude");
|
arithmetic_magnitude_phase_run(src1, src2, dst, "arithm_magnitude");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const char **kernelString)
|
static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
|
||||||
{
|
{
|
||||||
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
|
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -985,7 +945,7 @@ static void arithmetic_phase_run(const oclMat &src1, const oclMat &src2, oclMat
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols1 ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
|
void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleInDegrees)
|
||||||
@ -1004,7 +964,7 @@ void cv::ocl::phase(const oclMat &x, const oclMat &y, oclMat &Angle, bool angleI
|
|||||||
static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
|
static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, oclMat &dst_mag, oclMat &dst_cart,
|
||||||
string kernelName, bool angleInDegrees)
|
string kernelName, bool angleInDegrees)
|
||||||
{
|
{
|
||||||
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
|
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -1057,7 +1017,7 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat
|
|||||||
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
|
static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
|
||||||
string kernelName)
|
string kernelName)
|
||||||
{
|
{
|
||||||
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE) && src1.type() == CV_64F)
|
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -1176,7 +1136,7 @@ void arithmetic_minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
|
|||||||
Point *minLoc, Point *maxLoc, const oclMat &mask)
|
Point *minLoc, Point *maxLoc, const oclMat &mask)
|
||||||
{
|
{
|
||||||
CV_Assert(src.oclchannels() == 1);
|
CV_Assert(src.oclchannels() == 1);
|
||||||
size_t groupnum = src.clCxt->computeUnits();
|
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
|
||||||
CV_Assert(groupnum != 0);
|
CV_Assert(groupnum != 0);
|
||||||
int minloc = -1 , maxloc = -1;
|
int minloc = -1 , maxloc = -1;
|
||||||
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ;
|
int vlen = 4, dbsize = groupnum * vlen * 4 * sizeof(T) ;
|
||||||
@ -1238,7 +1198,7 @@ typedef void (*minMaxLocFunc)(const oclMat &src, double *minVal, double *maxVal,
|
|||||||
void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
|
void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
|
||||||
Point *minLoc, Point *maxLoc, const oclMat &mask)
|
Point *minLoc, Point *maxLoc, const oclMat &mask)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double");
|
||||||
return;
|
return;
|
||||||
@ -1251,7 +1211,7 @@ void cv::ocl::minMaxLoc(const oclMat &src, double *minVal, double *maxVal,
|
|||||||
};
|
};
|
||||||
|
|
||||||
minMaxLocFunc func;
|
minMaxLocFunc func;
|
||||||
func = functab[(int)src.clCxt->supportsFeature(Context::CL_DOUBLE)];
|
func = functab[(int)src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)];
|
||||||
func(src, minVal, maxVal, minLoc, maxLoc, mask);
|
func(src, minVal, maxVal, minLoc, maxLoc, mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1296,7 +1256,7 @@ int cv::ocl::countNonZero(const oclMat &src)
|
|||||||
CV_Assert(src.channels() == 1);
|
CV_Assert(src.channels() == 1);
|
||||||
|
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
|
CV_Error(CV_GpuNotSupported, "selected device doesn't support double");
|
||||||
}
|
}
|
||||||
@ -1327,7 +1287,7 @@ int cv::ocl::countNonZero(const oclMat &src)
|
|||||||
////////////////////////////////bitwise_op////////////////////////////////////
|
////////////////////////////////bitwise_op////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const char **kernelString)
|
static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
|
||||||
{
|
{
|
||||||
dst.create(src1.size(), src1.type());
|
dst.create(src1.size(), src1.type());
|
||||||
|
|
||||||
@ -1361,7 +1321,7 @@ static void bitwise_unary_run(const oclMat &src1, oclMat &dst, string kernelName
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum { AND = 0, OR, XOR };
|
enum { AND = 0, OR, XOR };
|
||||||
@ -1370,7 +1330,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
|
|||||||
oclMat &dst, int operationType)
|
oclMat &dst, int operationType)
|
||||||
{
|
{
|
||||||
Context *clCxt = src1.clCxt;
|
Context *clCxt = src1.clCxt;
|
||||||
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src1.depth() == CV_64F)
|
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src1.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
cout << "Selected device does not support double" << endl;
|
cout << "Selected device does not support double" << endl;
|
||||||
return;
|
return;
|
||||||
@ -1442,7 +1402,7 @@ static void bitwise_binary_run(const oclMat &src1, const oclMat &src2, const Sca
|
|||||||
|
|
||||||
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
|
void cv::ocl::bitwise_not(const oclMat &src, oclMat &dst)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
|
||||||
{
|
{
|
||||||
cout << "Selected device does not support double" << endl;
|
cout << "Selected device does not support double" << endl;
|
||||||
return;
|
return;
|
||||||
@ -1571,7 +1531,7 @@ oclMatExpr::operator oclMat() const
|
|||||||
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
|
static void transpose_run(const oclMat &src, oclMat &dst, string kernelName, bool inplace = false)
|
||||||
{
|
{
|
||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
if (!clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -1623,7 +1583,7 @@ void cv::ocl::transpose(const oclMat &src, oclMat &dst)
|
|||||||
void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst)
|
void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst)
|
||||||
{
|
{
|
||||||
Context *clCxt = src1.clCxt;
|
Context *clCxt = src1.clCxt;
|
||||||
bool hasDouble = clCxt->supportsFeature(Context::CL_DOUBLE);
|
bool hasDouble = clCxt->supportsFeature(FEATURE_CL_DOUBLE);
|
||||||
if (!hasDouble && src1.depth() == CV_64F)
|
if (!hasDouble && src1.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device doesn't support double\r\n");
|
||||||
@ -1688,7 +1648,7 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
|
|||||||
/////////////////////////////////// Pow //////////////////////////////////////
|
/////////////////////////////////// Pow //////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const char **kernelString)
|
static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
|
||||||
{
|
{
|
||||||
CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows);
|
CV_Assert(src1.cols == dst.cols && src1.rows == dst.rows);
|
||||||
CV_Assert(src1.type() == dst.type());
|
CV_Assert(src1.type() == dst.type());
|
||||||
@ -1718,17 +1678,17 @@ static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
|
||||||
|
|
||||||
float pf = static_cast<float>(p);
|
float pf = static_cast<float>(p);
|
||||||
if (!src1.clCxt->supportsFeature(Context::CL_DOUBLE))
|
if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
|
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
|
||||||
else
|
else
|
||||||
args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
|
args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, kernelString, kernelName, globalThreads, localThreads, args, -1, depth);
|
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
|
void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
|
||||||
{
|
{
|
||||||
if (!x.clCxt->supportsFeature(Context::CL_DOUBLE) && x.type() == CV_64F)
|
if (!x.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && x.type() == CV_64F)
|
||||||
{
|
{
|
||||||
cout << "Selected device do not support double" << endl;
|
cout << "Selected device do not support double" << endl;
|
||||||
return;
|
return;
|
||||||
|
@ -392,7 +392,7 @@ void cv::ocl::device::mog::loadConstants(float Tb, float TB, float Tg, float var
|
|||||||
constants->c_tau = tau;
|
constants->c_tau = tau;
|
||||||
constants->c_shadowVal = shadowVal;
|
constants->c_shadowVal = shadowVal;
|
||||||
|
|
||||||
cl_constants = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()),
|
cl_constants = load_constant(*((cl_context*)getClContextPtr()), *((cl_command_queue*)getClCommandQueuePtr()),
|
||||||
(void *)constants, sizeof(_contant_struct));
|
(void *)constants, sizeof(_contant_struct));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -635,4 +635,4 @@ void cv::ocl::MOG2::release()
|
|||||||
mean_.release();
|
mean_.release();
|
||||||
|
|
||||||
bgmodelUsedModes_.release();
|
bgmodelUsedModes_.release();
|
||||||
}
|
}
|
||||||
|
@ -50,41 +50,36 @@ using namespace std;
|
|||||||
using std::cout;
|
using std::cout;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
|
|
||||||
namespace cv
|
namespace cv { namespace ocl {
|
||||||
|
|
||||||
|
class ProgramCache
|
||||||
{
|
{
|
||||||
namespace ocl
|
protected:
|
||||||
{
|
ProgramCache();
|
||||||
class ProgramCache
|
~ProgramCache();
|
||||||
{
|
friend class std::auto_ptr<ProgramCache>;
|
||||||
protected:
|
public:
|
||||||
ProgramCache();
|
static ProgramCache *getProgramCache();
|
||||||
friend class auto_ptr<ProgramCache>;
|
|
||||||
static auto_ptr<ProgramCache> programCache;
|
|
||||||
|
|
||||||
public:
|
cl_program getProgram(const Context *ctx, const char **source, string kernelName,
|
||||||
~ProgramCache();
|
const char *build_options);
|
||||||
static ProgramCache *getProgramCache()
|
|
||||||
{
|
|
||||||
if( NULL == programCache.get())
|
|
||||||
programCache.reset(new ProgramCache());
|
|
||||||
return programCache.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
//lookup the binary given the file name
|
void releaseProgram();
|
||||||
cl_program progLookup(string srcsign);
|
protected:
|
||||||
|
//lookup the binary given the file name
|
||||||
|
cl_program progLookup(string srcsign);
|
||||||
|
|
||||||
//add program to the cache
|
//add program to the cache
|
||||||
void addProgram(string srcsign, cl_program program);
|
void addProgram(string srcsign, cl_program program);
|
||||||
void releaseProgram();
|
|
||||||
|
|
||||||
map <string, cl_program> codeCache;
|
map <string, cl_program> codeCache;
|
||||||
unsigned int cacheSize;
|
unsigned int cacheSize;
|
||||||
//The presumed watermark for the cache volume (256MB). Is it enough?
|
|
||||||
//We may need more delicate algorithms when necessary later.
|
|
||||||
//Right now, let's just leave it along.
|
|
||||||
static const unsigned MAX_PROG_CACHE_SIZE = 1024;
|
|
||||||
};
|
|
||||||
|
|
||||||
}//namespace ocl
|
//The presumed watermark for the cache volume (256MB). Is it enough?
|
||||||
|
//We may need more delicate algorithms when necessary later.
|
||||||
|
//Right now, let's just leave it along.
|
||||||
|
static const unsigned MAX_PROG_CACHE_SIZE = 1024;
|
||||||
|
};
|
||||||
|
|
||||||
|
}//namespace ocl
|
||||||
}//namespace cv
|
}//namespace cv
|
||||||
|
@ -245,7 +245,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, const oclM
|
|||||||
{
|
{
|
||||||
const oclMat zeroMask;
|
const oclMat zeroMask;
|
||||||
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
||||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
bool is_cpu = isCpuDevice();
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
|
matchUnrolledCached<16, 64>(query, train, tempMask, trainIdx, distance, distType);
|
||||||
@ -265,7 +265,7 @@ static void matchDispatcher(const oclMat &query, const oclMat *trains, int n, co
|
|||||||
{
|
{
|
||||||
const oclMat zeroMask;
|
const oclMat zeroMask;
|
||||||
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
||||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
bool is_cpu = isCpuDevice();
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
|
matchUnrolledCached<16, 64>(query, trains, n, tempMask, trainIdx, imgIdx, distance, distType);
|
||||||
@ -286,7 +286,7 @@ static void matchDispatcher(const oclMat &query, const oclMat &train, float maxD
|
|||||||
{
|
{
|
||||||
const oclMat zeroMask;
|
const oclMat zeroMask;
|
||||||
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
const oclMat &tempMask = mask.data ? mask : zeroMask;
|
||||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
bool is_cpu = isCpuDevice();
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
|
matchUnrolledCached<16, 64>(query, train, maxDistance, tempMask, trainIdx, distance, nMatches, distType);
|
||||||
@ -469,7 +469,7 @@ static void calcDistanceDispatcher(const oclMat &query, const oclMat &train, con
|
|||||||
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
|
static void match2Dispatcher(const oclMat &query, const oclMat &train, const oclMat &mask,
|
||||||
const oclMat &trainIdx, const oclMat &distance, int distType)
|
const oclMat &trainIdx, const oclMat &distance, int distType)
|
||||||
{
|
{
|
||||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
bool is_cpu = isCpuDevice();
|
||||||
if (query.cols <= 64)
|
if (query.cols <= 64)
|
||||||
{
|
{
|
||||||
knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
|
knn_matchUnrolledCached<16, 64>(query, train, mask, trainIdx, distance, distType);
|
||||||
|
@ -98,7 +98,7 @@ void cv::ocl::CannyBuf::create(const Size &image_size, int apperture_size)
|
|||||||
{
|
{
|
||||||
openCLFree(counter);
|
openCLFree(counter);
|
||||||
}
|
}
|
||||||
counter = clCreateBuffer( *((cl_context*)getoclContext()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
|
counter = clCreateBuffer( *((cl_context*)getClContextPtr()), CL_MEM_COPY_HOST_PTR, sizeof(int), counter_i, &err );
|
||||||
openCLSafeCall(err);
|
openCLSafeCall(err);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -354,7 +354,7 @@ void canny::edgesHysteresisLocal_gpu(oclMat &map, oclMat &st1, void *counter, in
|
|||||||
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
|
void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, void *counter, int rows, int cols)
|
||||||
{
|
{
|
||||||
unsigned int count;
|
unsigned int count;
|
||||||
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
|
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(float), &count, 0, NULL, NULL));
|
||||||
Context *clCxt = map.clCxt;
|
Context *clCxt = map.clCxt;
|
||||||
string kernelName = "edgesHysteresisGlobal";
|
string kernelName = "edgesHysteresisGlobal";
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
@ -363,7 +363,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
|
|||||||
int count_i[1] = {0};
|
int count_i[1] = {0};
|
||||||
while(count > 0)
|
while(count > 0)
|
||||||
{
|
{
|
||||||
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
|
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count_i, 0, NULL, NULL));
|
||||||
|
|
||||||
args.clear();
|
args.clear();
|
||||||
size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
|
size_t globalThreads[3] = {std::min(count, 65535u) * 128, divUp(count, 65535), 1};
|
||||||
@ -378,7 +378,7 @@ void canny::edgesHysteresisGlobal_gpu(oclMat &map, oclMat &st1, oclMat &st2, voi
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||||
|
|
||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getoclCommandQueue(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
|
openCLSafeCall(clEnqueueReadBuffer(*(cl_command_queue*)getClCommandQueuePtr(), (cl_mem)counter, 1, 0, sizeof(int), &count, 0, NULL, NULL));
|
||||||
std::swap(st1, st2);
|
std::swap(st1, st2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
507
modules/ocl/src/cl_context.cpp
Normal file
507
modules/ocl/src/cl_context.cpp
Normal file
@ -0,0 +1,507 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Guoping Long, longguoping@gmail.com
|
||||||
|
// Niko Li, newlife20080214@gmail.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include "binarycaching.hpp"
|
||||||
|
|
||||||
|
#undef __CL_ENABLE_EXCEPTIONS
|
||||||
|
#include <CL/cl.hpp>
|
||||||
|
|
||||||
|
namespace cv { namespace ocl {
|
||||||
|
|
||||||
|
extern void fft_teardown();
|
||||||
|
extern void clBlasTeardown();
|
||||||
|
|
||||||
|
struct PlatformInfoImpl
|
||||||
|
{
|
||||||
|
cl_platform_id platform_id;
|
||||||
|
|
||||||
|
std::vector<int> deviceIDs;
|
||||||
|
|
||||||
|
PlatformInfo info;
|
||||||
|
|
||||||
|
PlatformInfoImpl()
|
||||||
|
: platform_id(NULL)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DeviceInfoImpl
|
||||||
|
{
|
||||||
|
cl_platform_id platform_id;
|
||||||
|
cl_device_id device_id;
|
||||||
|
|
||||||
|
DeviceInfo info;
|
||||||
|
|
||||||
|
DeviceInfoImpl()
|
||||||
|
: platform_id(NULL), device_id(NULL)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static std::vector<PlatformInfoImpl> global_platforms;
|
||||||
|
static std::vector<DeviceInfoImpl> global_devices;
|
||||||
|
|
||||||
|
static bool parseOpenCLVersion(const std::string& versionStr, int& major, int& minor)
|
||||||
|
{
|
||||||
|
size_t p0 = versionStr.find(' ');
|
||||||
|
while (true)
|
||||||
|
{
|
||||||
|
if (p0 == std::string::npos)
|
||||||
|
break;
|
||||||
|
if (p0 + 1 >= versionStr.length())
|
||||||
|
break;
|
||||||
|
char c = versionStr[p0 + 1];
|
||||||
|
if (isdigit(c))
|
||||||
|
break;
|
||||||
|
p0 = versionStr.find(' ', p0 + 1);
|
||||||
|
}
|
||||||
|
size_t p1 = versionStr.find('.', p0);
|
||||||
|
size_t p2 = versionStr.find(' ', p1);
|
||||||
|
if (p0 == std::string::npos || p1 == std::string::npos || p2 == std::string::npos)
|
||||||
|
{
|
||||||
|
major = 0;
|
||||||
|
minor = 0;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::string majorStr = versionStr.substr(p0 + 1, p1 - p0 - 1);
|
||||||
|
std::string minorStr = versionStr.substr(p1 + 1, p2 - p1 - 1);
|
||||||
|
major = atoi(majorStr.c_str());
|
||||||
|
minor = atoi(minorStr.c_str());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int initializeOpenCLDevices()
|
||||||
|
{
|
||||||
|
assert(global_devices.size() == 0);
|
||||||
|
|
||||||
|
std::vector<cl::Platform> platforms;
|
||||||
|
try
|
||||||
|
{
|
||||||
|
openCLSafeCall(cl::Platform::get(&platforms));
|
||||||
|
}
|
||||||
|
catch (cv::Exception& e)
|
||||||
|
{
|
||||||
|
return 0; // OpenCL not found
|
||||||
|
}
|
||||||
|
|
||||||
|
global_platforms.resize(platforms.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < platforms.size(); ++i)
|
||||||
|
{
|
||||||
|
PlatformInfoImpl& platformInfo = global_platforms[i];
|
||||||
|
platformInfo.info._id = i;
|
||||||
|
|
||||||
|
cl::Platform& platform = platforms[i];
|
||||||
|
|
||||||
|
platformInfo.platform_id = platform();
|
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_PROFILE, &platformInfo.info.platformProfile));
|
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_VERSION, &platformInfo.info.platformVersion));
|
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_NAME, &platformInfo.info.platformName));
|
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_VENDOR, &platformInfo.info.platformVendor));
|
||||||
|
openCLSafeCall(platform.getInfo(CL_PLATFORM_EXTENSIONS, &platformInfo.info.platformExtensons));
|
||||||
|
|
||||||
|
parseOpenCLVersion(platformInfo.info.platformVersion,
|
||||||
|
platformInfo.info.platformVersionMajor, platformInfo.info.platformVersionMinor);
|
||||||
|
|
||||||
|
std::vector<cl::Device> devices;
|
||||||
|
cl_int status = platform.getDevices(CL_DEVICE_TYPE_ALL, &devices);
|
||||||
|
if(status != CL_DEVICE_NOT_FOUND)
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
|
||||||
|
if(devices.size() > 0)
|
||||||
|
{
|
||||||
|
int baseIndx = global_devices.size();
|
||||||
|
global_devices.resize(baseIndx + devices.size());
|
||||||
|
platformInfo.deviceIDs.resize(devices.size());
|
||||||
|
platformInfo.info.devices.resize(devices.size());
|
||||||
|
|
||||||
|
for(size_t j = 0; j < devices.size(); ++j)
|
||||||
|
{
|
||||||
|
cl::Device& device = devices[j];
|
||||||
|
|
||||||
|
DeviceInfoImpl& deviceInfo = global_devices[baseIndx + j];
|
||||||
|
deviceInfo.info._id = baseIndx + j;
|
||||||
|
deviceInfo.platform_id = platform();
|
||||||
|
deviceInfo.device_id = device();
|
||||||
|
|
||||||
|
deviceInfo.info.platform = &platformInfo.info;
|
||||||
|
platformInfo.deviceIDs[j] = deviceInfo.info._id;
|
||||||
|
|
||||||
|
cl_device_type type = -1;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_TYPE, &type));
|
||||||
|
deviceInfo.info.deviceType = DeviceType(type);
|
||||||
|
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_PROFILE, &deviceInfo.info.deviceProfile));
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_VERSION, &deviceInfo.info.deviceVersion));
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_NAME, &deviceInfo.info.deviceName));
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR, &deviceInfo.info.deviceVendor));
|
||||||
|
cl_uint vendorID = -1;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_VENDOR_ID, &vendorID));
|
||||||
|
deviceInfo.info.deviceVendorId = vendorID;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DRIVER_VERSION, &deviceInfo.info.deviceDriverVersion));
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions));
|
||||||
|
|
||||||
|
parseOpenCLVersion(deviceInfo.info.deviceVersion,
|
||||||
|
deviceInfo.info.deviceVersionMajor, deviceInfo.info.deviceVersionMinor);
|
||||||
|
|
||||||
|
size_t maxWorkGroupSize = 0;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE, &maxWorkGroupSize));
|
||||||
|
deviceInfo.info.maxWorkGroupSize = maxWorkGroupSize;
|
||||||
|
|
||||||
|
cl_uint maxDimensions = 0;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, &maxDimensions));
|
||||||
|
std::vector<size_t> maxWorkItemSizes(maxDimensions);
|
||||||
|
openCLSafeCall(clGetDeviceInfo(device(), CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions,
|
||||||
|
(void *)&maxWorkItemSizes[0], 0));
|
||||||
|
deviceInfo.info.maxWorkItemSizes = maxWorkItemSizes;
|
||||||
|
|
||||||
|
cl_uint maxComputeUnits = 0;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_MAX_COMPUTE_UNITS, &maxComputeUnits));
|
||||||
|
deviceInfo.info.maxComputeUnits = maxComputeUnits;
|
||||||
|
|
||||||
|
cl_ulong localMemorySize = 0;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_LOCAL_MEM_SIZE, &localMemorySize));
|
||||||
|
deviceInfo.info.localMemorySize = (size_t)localMemorySize;
|
||||||
|
|
||||||
|
|
||||||
|
cl_bool unifiedMemory = false;
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_HOST_UNIFIED_MEMORY, &unifiedMemory));
|
||||||
|
deviceInfo.info.isUnifiedMemory = unifiedMemory != 0;
|
||||||
|
|
||||||
|
//initialize extra options for compilation. Currently only fp64 is included.
|
||||||
|
//Assume 4KB is enough to store all possible extensions.
|
||||||
|
openCLSafeCall(device.getInfo(CL_DEVICE_EXTENSIONS, &deviceInfo.info.deviceExtensions));
|
||||||
|
|
||||||
|
size_t fp64_khr = deviceInfo.info.deviceExtensions.find("cl_khr_fp64");
|
||||||
|
if(fp64_khr != std::string::npos)
|
||||||
|
{
|
||||||
|
deviceInfo.info.compilationExtraOptions += "-D DOUBLE_SUPPORT";
|
||||||
|
deviceInfo.info.haveDoubleSupport = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
deviceInfo.info.haveDoubleSupport = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < platforms.size(); ++i)
|
||||||
|
{
|
||||||
|
PlatformInfoImpl& platformInfo = global_platforms[i];
|
||||||
|
for(size_t j = 0; j < platformInfo.deviceIDs.size(); ++j)
|
||||||
|
{
|
||||||
|
DeviceInfoImpl& deviceInfo = global_devices[platformInfo.deviceIDs[j]];
|
||||||
|
platformInfo.info.devices[j] = &deviceInfo.info;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return global_devices.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
DeviceInfo::DeviceInfo()
|
||||||
|
: _id(-1), deviceType(DeviceType(0)),
|
||||||
|
deviceVendorId(-1),
|
||||||
|
maxWorkGroupSize(0), maxComputeUnits(0), localMemorySize(0),
|
||||||
|
deviceVersionMajor(0), deviceVersionMinor(0),
|
||||||
|
haveDoubleSupport(false), isUnifiedMemory(false),
|
||||||
|
platform(NULL)
|
||||||
|
{
|
||||||
|
// nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
PlatformInfo::PlatformInfo()
|
||||||
|
: _id(-1),
|
||||||
|
platformVersionMajor(0), platformVersionMinor(0)
|
||||||
|
{
|
||||||
|
// nothing
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////// OpenCL context ////////////////////////
|
||||||
|
//This is a global singleton class used to represent a OpenCL context.
|
||||||
|
class ContextImpl : public Context
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
const cl_device_id clDeviceID;
|
||||||
|
cl_context clContext;
|
||||||
|
cl_command_queue clCmdQueue;
|
||||||
|
const DeviceInfo& deviceInfo;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
ContextImpl(const DeviceInfo& deviceInfo, cl_device_id clDeviceID)
|
||||||
|
: clDeviceID(clDeviceID), clContext(NULL), clCmdQueue(NULL), deviceInfo(deviceInfo)
|
||||||
|
{
|
||||||
|
// nothing
|
||||||
|
}
|
||||||
|
~ContextImpl();
|
||||||
|
public:
|
||||||
|
|
||||||
|
static ContextImpl* getContext();
|
||||||
|
static void setContext(const DeviceInfo* deviceInfo);
|
||||||
|
|
||||||
|
bool supportsFeature(FEATURE_TYPE featureType) const;
|
||||||
|
|
||||||
|
static void cleanupContext(void);
|
||||||
|
};
|
||||||
|
|
||||||
|
static cv::Mutex currentContextMutex;
|
||||||
|
static ContextImpl* currentContext = NULL;
|
||||||
|
|
||||||
|
Context* Context::getContext()
|
||||||
|
{
|
||||||
|
return currentContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Context::supportsFeature(FEATURE_TYPE featureType) const
|
||||||
|
{
|
||||||
|
return ((ContextImpl*)this)->supportsFeature(featureType);
|
||||||
|
}
|
||||||
|
|
||||||
|
const DeviceInfo& Context::getDeviceInfo() const
|
||||||
|
{
|
||||||
|
return ((ContextImpl*)this)->deviceInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
const void* Context::getOpenCLContextPtr() const
|
||||||
|
{
|
||||||
|
return &(((ContextImpl*)this)->clContext);
|
||||||
|
}
|
||||||
|
|
||||||
|
const void* Context::getOpenCLCommandQueuePtr() const
|
||||||
|
{
|
||||||
|
return &(((ContextImpl*)this)->clCmdQueue);
|
||||||
|
}
|
||||||
|
|
||||||
|
const void* Context::getOpenCLDeviceIDPtr() const
|
||||||
|
{
|
||||||
|
return &(((ContextImpl*)this)->clDeviceID);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool ContextImpl::supportsFeature(FEATURE_TYPE featureType) const
|
||||||
|
{
|
||||||
|
switch (featureType)
|
||||||
|
{
|
||||||
|
case FEATURE_CL_DOUBLE:
|
||||||
|
return deviceInfo.haveDoubleSupport;
|
||||||
|
case FEATURE_CL_UNIFIED_MEM:
|
||||||
|
return deviceInfo.isUnifiedMemory;
|
||||||
|
case FEATURE_CL_VER_1_2:
|
||||||
|
return deviceInfo.deviceVersionMajor > 1 || (deviceInfo.deviceVersionMajor == 1 && deviceInfo.deviceVersionMinor >= 2);
|
||||||
|
}
|
||||||
|
CV_Error(CV_StsBadArg, "Invalid feature type");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(WIN32)
|
||||||
|
static bool __termination = false;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ContextImpl::~ContextImpl()
|
||||||
|
{
|
||||||
|
fft_teardown();
|
||||||
|
clBlasTeardown();
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
// if process is on termination stage (ExitProcess was called and other threads were terminated)
|
||||||
|
// then disable command queue release because it may cause program hang
|
||||||
|
if (!__termination)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
if(clCmdQueue)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clReleaseCommandQueue(clCmdQueue)); // some cleanup problems are here
|
||||||
|
}
|
||||||
|
|
||||||
|
if(clContext)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clReleaseContext(clContext));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clCmdQueue = NULL;
|
||||||
|
clContext = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ContextImpl::cleanupContext(void)
|
||||||
|
{
|
||||||
|
cv::AutoLock lock(currentContextMutex);
|
||||||
|
if (currentContext)
|
||||||
|
delete currentContext;
|
||||||
|
currentContext = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ContextImpl::setContext(const DeviceInfo* deviceInfo)
|
||||||
|
{
|
||||||
|
CV_Assert(deviceInfo->_id >= 0 && deviceInfo->_id < (int)global_devices.size());
|
||||||
|
|
||||||
|
DeviceInfoImpl& infoImpl = global_devices[deviceInfo->_id];
|
||||||
|
CV_Assert(deviceInfo == &infoImpl.info);
|
||||||
|
|
||||||
|
cl_int status = 0;
|
||||||
|
cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)(infoImpl.platform_id), 0 };
|
||||||
|
cl_context clContext = clCreateContext(cps, 1, &infoImpl.device_id, NULL, NULL, &status);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
// TODO add CL_QUEUE_PROFILING_ENABLE
|
||||||
|
cl_command_queue clCmdQueue = clCreateCommandQueue(clContext, infoImpl.device_id, 0, &status);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
|
||||||
|
ContextImpl* ctx = new ContextImpl(infoImpl.info, infoImpl.device_id);
|
||||||
|
ctx->clCmdQueue = clCmdQueue;
|
||||||
|
ctx->clContext = clContext;
|
||||||
|
|
||||||
|
ContextImpl* old = NULL;
|
||||||
|
{
|
||||||
|
cv::AutoLock lock(currentContextMutex);
|
||||||
|
old = currentContext;
|
||||||
|
currentContext = ctx;
|
||||||
|
}
|
||||||
|
if (old != NULL)
|
||||||
|
{
|
||||||
|
delete old;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ContextImpl* ContextImpl::getContext()
|
||||||
|
{
|
||||||
|
return currentContext;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getOpenCLPlatforms(PlatformsInfo& platforms)
|
||||||
|
{
|
||||||
|
platforms.clear();
|
||||||
|
|
||||||
|
for (size_t id = 0; id < global_platforms.size(); ++id)
|
||||||
|
{
|
||||||
|
PlatformInfoImpl& impl = global_platforms[id];
|
||||||
|
platforms.push_back(&impl.info);
|
||||||
|
}
|
||||||
|
|
||||||
|
return platforms.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
int getOpenCLDevices(std::vector<const DeviceInfo*> &devices, int deviceType, const PlatformInfo* platform)
|
||||||
|
{
|
||||||
|
devices.clear();
|
||||||
|
|
||||||
|
switch(deviceType)
|
||||||
|
{
|
||||||
|
case CVCL_DEVICE_TYPE_DEFAULT:
|
||||||
|
case CVCL_DEVICE_TYPE_CPU:
|
||||||
|
case CVCL_DEVICE_TYPE_GPU:
|
||||||
|
case CVCL_DEVICE_TYPE_ACCELERATOR:
|
||||||
|
case CVCL_DEVICE_TYPE_ALL:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (platform == NULL)
|
||||||
|
{
|
||||||
|
for (size_t id = 0; id < global_devices.size(); ++id)
|
||||||
|
{
|
||||||
|
DeviceInfoImpl& deviceInfo = global_devices[id];
|
||||||
|
if (((int)deviceInfo.info.deviceType & deviceType) == deviceType)
|
||||||
|
{
|
||||||
|
devices.push_back(&deviceInfo.info);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (size_t id = 0; id < platform->devices.size(); ++id)
|
||||||
|
{
|
||||||
|
const DeviceInfo* deviceInfo = platform->devices[id];
|
||||||
|
if (((int)deviceInfo->deviceType & deviceType) == deviceType)
|
||||||
|
{
|
||||||
|
devices.push_back(deviceInfo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int)devices.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
void setDevice(const DeviceInfo* info)
|
||||||
|
{
|
||||||
|
ContextImpl::setContext(info);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool supportsFeature(FEATURE_TYPE featureType)
|
||||||
|
{
|
||||||
|
return Context::getContext()->supportsFeature(featureType);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct __Module
|
||||||
|
{
|
||||||
|
__Module() { initializeOpenCLDevices(); }
|
||||||
|
~__Module() { ContextImpl::cleanupContext(); }
|
||||||
|
};
|
||||||
|
static __Module __module;
|
||||||
|
|
||||||
|
|
||||||
|
}//namespace ocl
|
||||||
|
}//namespace cv
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(WIN32) && defined(CVAPI_EXPORTS)
|
||||||
|
|
||||||
|
extern "C"
|
||||||
|
BOOL WINAPI DllMain(HINSTANCE /*hInst*/, DWORD fdwReason, LPVOID lpReserved)
|
||||||
|
{
|
||||||
|
if (fdwReason == DLL_PROCESS_DETACH)
|
||||||
|
{
|
||||||
|
if (lpReserved != NULL) // called after ExitProcess() call
|
||||||
|
cv::ocl::__termination = true;
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
434
modules/ocl/src/cl_operations.cpp
Normal file
434
modules/ocl/src/cl_operations.cpp
Normal file
@ -0,0 +1,434 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Guoping Long, longguoping@gmail.com
|
||||||
|
// Niko Li, newlife20080214@gmail.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include "binarycaching.hpp"
|
||||||
|
|
||||||
|
#undef __CL_ENABLE_EXCEPTIONS
|
||||||
|
#include <CL/cl.hpp>
|
||||||
|
|
||||||
|
//#define PRINT_KERNEL_RUN_TIME
|
||||||
|
#define RUN_TIMES 100
|
||||||
|
#ifndef CL_MEM_USE_PERSISTENT_MEM_AMD
|
||||||
|
#define CL_MEM_USE_PERSISTENT_MEM_AMD 0
|
||||||
|
#endif
|
||||||
|
//#define AMD_DOUBLE_DIFFER
|
||||||
|
|
||||||
|
namespace cv { namespace ocl {
|
||||||
|
|
||||||
|
DevMemType gDeviceMemType = DEVICE_MEM_DEFAULT;
|
||||||
|
DevMemRW gDeviceMemRW = DEVICE_MEM_R_W;
|
||||||
|
int gDevMemTypeValueMap[5] = {0,
|
||||||
|
CL_MEM_ALLOC_HOST_PTR,
|
||||||
|
CL_MEM_USE_HOST_PTR,
|
||||||
|
CL_MEM_COPY_HOST_PTR,
|
||||||
|
CL_MEM_USE_PERSISTENT_MEM_AMD};
|
||||||
|
int gDevMemRWValueMap[3] = {CL_MEM_READ_WRITE, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY};
|
||||||
|
|
||||||
|
void finish()
|
||||||
|
{
|
||||||
|
clFinish(getClCommandQueue(Context::getContext()));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool isCpuDevice()
|
||||||
|
{
|
||||||
|
const DeviceInfo& info = Context::getContext()->getDeviceInfo();
|
||||||
|
return (info.deviceType == CVCL_DEVICE_TYPE_CPU);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t queryWaveFrontSize(cl_kernel kernel)
|
||||||
|
{
|
||||||
|
const DeviceInfo& info = Context::getContext()->getDeviceInfo();
|
||||||
|
if (info.deviceType == CVCL_DEVICE_TYPE_CPU)
|
||||||
|
return 1;
|
||||||
|
size_t wavefront = 0;
|
||||||
|
CV_Assert(kernel != NULL);
|
||||||
|
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(Context::getContext()),
|
||||||
|
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &wavefront, NULL));
|
||||||
|
return wavefront;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void openCLReadBuffer(Context *ctx, cl_mem dst_buffer, void *host_buffer, size_t size)
|
||||||
|
{
|
||||||
|
cl_int status;
|
||||||
|
status = clEnqueueReadBuffer(getClCommandQueue(ctx), dst_buffer, CL_TRUE, 0,
|
||||||
|
size, host_buffer, 0, NULL, NULL);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_mem openCLCreateBuffer(Context *ctx, size_t flag , size_t size)
|
||||||
|
{
|
||||||
|
cl_int status;
|
||||||
|
cl_mem buffer = clCreateBuffer(getClContext(ctx), (cl_mem_flags)flag, size, NULL, &status);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void openCLMallocPitch(Context *ctx, void **dev_ptr, size_t *pitch,
|
||||||
|
size_t widthInBytes, size_t height)
|
||||||
|
{
|
||||||
|
openCLMallocPitchEx(ctx, dev_ptr, pitch, widthInBytes, height, gDeviceMemRW, gDeviceMemType);
|
||||||
|
}
|
||||||
|
|
||||||
|
void openCLMallocPitchEx(Context *ctx, void **dev_ptr, size_t *pitch,
|
||||||
|
size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type)
|
||||||
|
{
|
||||||
|
cl_int status;
|
||||||
|
*dev_ptr = clCreateBuffer(getClContext(ctx), gDevMemRWValueMap[rw_type]|gDevMemTypeValueMap[mem_type],
|
||||||
|
widthInBytes * height, 0, &status);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
*pitch = widthInBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void openCLMemcpy2D(Context *ctx, void *dst, size_t dpitch,
|
||||||
|
const void *src, size_t spitch,
|
||||||
|
size_t width, size_t height, openCLMemcpyKind kind, int channels)
|
||||||
|
{
|
||||||
|
size_t buffer_origin[3] = {0, 0, 0};
|
||||||
|
size_t host_origin[3] = {0, 0, 0};
|
||||||
|
size_t region[3] = {width, height, 1};
|
||||||
|
if(kind == clMemcpyHostToDevice)
|
||||||
|
{
|
||||||
|
if(dpitch == width || channels == 3 || height == 1)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clEnqueueWriteBuffer(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
|
||||||
|
0, width * height, src, 0, NULL, NULL));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
openCLSafeCall(clEnqueueWriteBufferRect(getClCommandQueue(ctx), (cl_mem)dst, CL_TRUE,
|
||||||
|
buffer_origin, host_origin, region, dpitch, 0, spitch, 0, src, 0, 0, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if(kind == clMemcpyDeviceToHost)
|
||||||
|
{
|
||||||
|
if(spitch == width || channels == 3 || height == 1)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clEnqueueReadBuffer(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
|
||||||
|
0, width * height, dst, 0, NULL, NULL));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
openCLSafeCall(clEnqueueReadBufferRect(getClCommandQueue(ctx), (cl_mem)src, CL_TRUE,
|
||||||
|
buffer_origin, host_origin, region, spitch, 0, dpitch, 0, dst, 0, 0, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void openCLCopyBuffer2D(Context *ctx, void *dst, size_t dpitch, int dst_offset,
|
||||||
|
const void *src, size_t spitch,
|
||||||
|
size_t width, size_t height, int src_offset)
|
||||||
|
{
|
||||||
|
size_t src_origin[3] = {src_offset % spitch, src_offset / spitch, 0};
|
||||||
|
size_t dst_origin[3] = {dst_offset % dpitch, dst_offset / dpitch, 0};
|
||||||
|
size_t region[3] = {width, height, 1};
|
||||||
|
|
||||||
|
openCLSafeCall(clEnqueueCopyBufferRect(getClCommandQueue(ctx), (cl_mem)src, (cl_mem)dst, src_origin, dst_origin,
|
||||||
|
region, spitch, 0, dpitch, 0, 0, 0, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
void openCLFree(void *devPtr)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clReleaseMemObject((cl_mem)devPtr));
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName)
|
||||||
|
{
|
||||||
|
return openCLGetKernelFromSource(ctx, source, kernelName, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_kernel openCLGetKernelFromSource(const Context *ctx, const char **source, string kernelName,
|
||||||
|
const char *build_options)
|
||||||
|
{
|
||||||
|
cl_kernel kernel;
|
||||||
|
cl_int status = 0;
|
||||||
|
CV_Assert(ProgramCache::getProgramCache() != NULL);
|
||||||
|
cl_program program = ProgramCache::getProgramCache()->getProgram(ctx, source, kernelName, build_options);
|
||||||
|
CV_Assert(program != NULL);
|
||||||
|
kernel = clCreateKernel(program, kernelName.c_str(), &status);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
return kernel;
|
||||||
|
}
|
||||||
|
|
||||||
|
void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThreads)
|
||||||
|
{
|
||||||
|
size_t kernelWorkGroupSize;
|
||||||
|
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, getClDeviceID(ctx),
|
||||||
|
CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &kernelWorkGroupSize, 0));
|
||||||
|
CV_Assert( localThreads[0] <= ctx->getDeviceInfo().maxWorkItemSizes[0] );
|
||||||
|
CV_Assert( localThreads[1] <= ctx->getDeviceInfo().maxWorkItemSizes[1] );
|
||||||
|
CV_Assert( localThreads[2] <= ctx->getDeviceInfo().maxWorkItemSizes[2] );
|
||||||
|
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= kernelWorkGroupSize );
|
||||||
|
CV_Assert( localThreads[0] * localThreads[1] * localThreads[2] <= ctx->getDeviceInfo().maxWorkGroupSize );
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef PRINT_KERNEL_RUN_TIME
|
||||||
|
static double total_execute_time = 0;
|
||||||
|
static double total_kernel_time = 0;
|
||||||
|
#endif
|
||||||
|
void openCLExecuteKernel_(Context *ctx , const char **source, string kernelName, size_t globalThreads[3],
|
||||||
|
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
||||||
|
int depth, const char *build_options)
|
||||||
|
{
|
||||||
|
//construct kernel name
|
||||||
|
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||||
|
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||||
|
stringstream idxStr;
|
||||||
|
if(channels != -1)
|
||||||
|
idxStr << "_C" << channels;
|
||||||
|
if(depth != -1)
|
||||||
|
idxStr << "_D" << depth;
|
||||||
|
kernelName += idxStr.str();
|
||||||
|
|
||||||
|
cl_kernel kernel;
|
||||||
|
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
|
||||||
|
|
||||||
|
if ( localThreads != NULL)
|
||||||
|
{
|
||||||
|
globalThreads[0] = roundUp(globalThreads[0], localThreads[0]);
|
||||||
|
globalThreads[1] = roundUp(globalThreads[1], localThreads[1]);
|
||||||
|
globalThreads[2] = roundUp(globalThreads[2], localThreads[2]);
|
||||||
|
|
||||||
|
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
|
||||||
|
}
|
||||||
|
for(size_t i = 0; i < args.size(); i ++)
|
||||||
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||||
|
|
||||||
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
|
||||||
|
localThreads, 0, NULL, NULL));
|
||||||
|
#else
|
||||||
|
cl_event event = NULL;
|
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
|
||||||
|
localThreads, 0, NULL, &event));
|
||||||
|
|
||||||
|
cl_ulong start_time, end_time, queue_time;
|
||||||
|
double execute_time = 0;
|
||||||
|
double total_time = 0;
|
||||||
|
|
||||||
|
openCLSafeCall(clWaitForEvents(1, &event));
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START,
|
||||||
|
sizeof(cl_ulong), &start_time, 0));
|
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
||||||
|
sizeof(cl_ulong), &end_time, 0));
|
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
|
||||||
|
sizeof(cl_ulong), &queue_time, 0));
|
||||||
|
|
||||||
|
execute_time = (double)(end_time - start_time) / (1000 * 1000);
|
||||||
|
total_time = (double)(end_time - queue_time) / (1000 * 1000);
|
||||||
|
|
||||||
|
total_execute_time += execute_time;
|
||||||
|
total_kernel_time += total_time;
|
||||||
|
clReleaseEvent(event);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
clFlush(getClCommandQueue(ctx));
|
||||||
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
|
}
|
||||||
|
|
||||||
|
void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
|
||||||
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
|
vector< pair<size_t, const void *> > &args, int channels, int depth)
|
||||||
|
{
|
||||||
|
openCLExecuteKernel(ctx, source, kernelName, globalThreads, localThreads, args,
|
||||||
|
channels, depth, NULL);
|
||||||
|
}
|
||||||
|
void openCLExecuteKernel(Context *ctx , const char **source, string kernelName,
|
||||||
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
|
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options)
|
||||||
|
|
||||||
|
{
|
||||||
|
#ifndef PRINT_KERNEL_RUN_TIME
|
||||||
|
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
||||||
|
build_options);
|
||||||
|
#else
|
||||||
|
string data_type[] = { "uchar", "char", "ushort", "short", "int", "float", "double"};
|
||||||
|
cout << endl;
|
||||||
|
cout << "Function Name: " << kernelName;
|
||||||
|
if(depth >= 0)
|
||||||
|
cout << " |data type: " << data_type[depth];
|
||||||
|
cout << " |channels: " << channels;
|
||||||
|
cout << " |Time Unit: " << "ms" << endl;
|
||||||
|
|
||||||
|
total_execute_time = 0;
|
||||||
|
total_kernel_time = 0;
|
||||||
|
cout << "-------------------------------------" << endl;
|
||||||
|
|
||||||
|
cout << setiosflags(ios::left) << setw(15) << "excute time";
|
||||||
|
cout << setiosflags(ios::left) << setw(15) << "lauch time";
|
||||||
|
cout << setiosflags(ios::left) << setw(15) << "kernel time" << endl;
|
||||||
|
int i = 0;
|
||||||
|
for(i = 0; i < RUN_TIMES; i++)
|
||||||
|
openCLExecuteKernel_(ctx, source, kernelName, globalThreads, localThreads, args, channels, depth,
|
||||||
|
build_options);
|
||||||
|
|
||||||
|
cout << "average kernel excute time: " << total_execute_time / RUN_TIMES << endl; // "ms" << endl;
|
||||||
|
cout << "average kernel total time: " << total_kernel_time / RUN_TIMES << endl; // "ms" << endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
double openCLExecuteKernelInterop(Context *ctx , const char **source, string kernelName,
|
||||||
|
size_t globalThreads[3], size_t localThreads[3],
|
||||||
|
vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
|
||||||
|
bool finish, bool measureKernelTime, bool cleanUp)
|
||||||
|
|
||||||
|
{
|
||||||
|
//construct kernel name
|
||||||
|
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||||
|
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||||
|
stringstream idxStr;
|
||||||
|
if(channels != -1)
|
||||||
|
idxStr << "_C" << channels;
|
||||||
|
if(depth != -1)
|
||||||
|
idxStr << "_D" << depth;
|
||||||
|
kernelName += idxStr.str();
|
||||||
|
|
||||||
|
cl_kernel kernel;
|
||||||
|
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
|
||||||
|
|
||||||
|
double kernelTime = 0.0;
|
||||||
|
|
||||||
|
if( globalThreads != NULL)
|
||||||
|
{
|
||||||
|
if ( localThreads != NULL)
|
||||||
|
{
|
||||||
|
globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
|
||||||
|
globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
|
||||||
|
globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
|
||||||
|
|
||||||
|
//size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
|
||||||
|
cv::ocl::openCLVerifyKernel(ctx, kernel, localThreads);
|
||||||
|
}
|
||||||
|
for(size_t i = 0; i < args.size(); i ++)
|
||||||
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||||
|
|
||||||
|
if(measureKernelTime == false)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
|
||||||
|
localThreads, 0, NULL, NULL));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cl_event event = NULL;
|
||||||
|
openCLSafeCall(clEnqueueNDRangeKernel(getClCommandQueue(ctx), kernel, 3, NULL, globalThreads,
|
||||||
|
localThreads, 0, NULL, &event));
|
||||||
|
|
||||||
|
cl_ulong end_time, queue_time;
|
||||||
|
|
||||||
|
openCLSafeCall(clWaitForEvents(1, &event));
|
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END,
|
||||||
|
sizeof(cl_ulong), &end_time, 0));
|
||||||
|
|
||||||
|
openCLSafeCall(clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED,
|
||||||
|
sizeof(cl_ulong), &queue_time, 0));
|
||||||
|
|
||||||
|
kernelTime = (double)(end_time - queue_time) / (1000 * 1000);
|
||||||
|
|
||||||
|
clReleaseEvent(event);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(finish)
|
||||||
|
{
|
||||||
|
clFinish(getClCommandQueue(ctx));
|
||||||
|
}
|
||||||
|
|
||||||
|
if(cleanUp)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
|
}
|
||||||
|
|
||||||
|
return kernelTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
//double openCLExecuteKernelInterop(Context *ctx , const char **fileName, const int numFiles, string kernelName,
|
||||||
|
// size_t globalThreads[3], size_t localThreads[3],
|
||||||
|
// vector< pair<size_t, const void *> > &args, int channels, int depth, const char *build_options,
|
||||||
|
// bool finish, bool measureKernelTime, bool cleanUp)
|
||||||
|
//
|
||||||
|
//{
|
||||||
|
// std::vector<std::string> fsource;
|
||||||
|
// for (int i = 0 ; i < numFiles ; i++)
|
||||||
|
// {
|
||||||
|
// std::string str;
|
||||||
|
// if (convertToString(fileName[i], str) >= 0)
|
||||||
|
// fsource.push_back(str);
|
||||||
|
// }
|
||||||
|
// const char **source = new const char *[numFiles];
|
||||||
|
// for (int i = 0 ; i < numFiles ; i++)
|
||||||
|
// source[i] = fsource[i].c_str();
|
||||||
|
// double kernelTime = openCLExecuteKernelInterop(ctx ,source, kernelName, globalThreads, localThreads,
|
||||||
|
// args, channels, depth, build_options, finish, measureKernelTime, cleanUp);
|
||||||
|
// fsource.clear();
|
||||||
|
// delete []source;
|
||||||
|
// return kernelTime;
|
||||||
|
//}
|
||||||
|
|
||||||
|
cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value,
|
||||||
|
const size_t size)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
cl_mem con_struct;
|
||||||
|
|
||||||
|
con_struct = clCreateBuffer(context, CL_MEM_READ_ONLY, size, NULL, &status);
|
||||||
|
openCLSafeCall(status);
|
||||||
|
|
||||||
|
openCLSafeCall(clEnqueueWriteBuffer(command_queue, con_struct, 1, 0, size,
|
||||||
|
value, 0, 0, 0));
|
||||||
|
|
||||||
|
return con_struct;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}//namespace ocl
|
||||||
|
}//namespace cv
|
311
modules/ocl/src/cl_programcache.cpp
Normal file
311
modules/ocl/src/cl_programcache.cpp
Normal file
@ -0,0 +1,311 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// @Authors
|
||||||
|
// Guoping Long, longguoping@gmail.com
|
||||||
|
// Niko Li, newlife20080214@gmail.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other oclMaterials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
#include <iomanip>
|
||||||
|
#include <fstream>
|
||||||
|
#include "binarycaching.hpp"
|
||||||
|
|
||||||
|
#undef __CL_ENABLE_EXCEPTIONS
|
||||||
|
#include <CL/cl.hpp>
|
||||||
|
|
||||||
|
namespace cv { namespace ocl {
|
||||||
|
/*
|
||||||
|
* The binary caching system to eliminate redundant program source compilation.
|
||||||
|
* Strictly, this is not a cache because we do not implement evictions right now.
|
||||||
|
* We shall add such features to trade-off memory consumption and performance when necessary.
|
||||||
|
*/
|
||||||
|
|
||||||
|
std::auto_ptr<ProgramCache> _programCache;
|
||||||
|
ProgramCache* ProgramCache::getProgramCache()
|
||||||
|
{
|
||||||
|
if (NULL == _programCache.get())
|
||||||
|
_programCache.reset(new ProgramCache());
|
||||||
|
return _programCache.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
ProgramCache::ProgramCache()
|
||||||
|
{
|
||||||
|
codeCache.clear();
|
||||||
|
cacheSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ProgramCache::~ProgramCache()
|
||||||
|
{
|
||||||
|
releaseProgram();
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_program ProgramCache::progLookup(string srcsign)
|
||||||
|
{
|
||||||
|
map<string, cl_program>::iterator iter;
|
||||||
|
iter = codeCache.find(srcsign);
|
||||||
|
if(iter != codeCache.end())
|
||||||
|
return iter->second;
|
||||||
|
else
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProgramCache::addProgram(string srcsign , cl_program program)
|
||||||
|
{
|
||||||
|
if(!progLookup(srcsign))
|
||||||
|
{
|
||||||
|
codeCache.insert(map<string, cl_program>::value_type(srcsign, program));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProgramCache::releaseProgram()
|
||||||
|
{
|
||||||
|
map<string, cl_program>::iterator iter;
|
||||||
|
for(iter = codeCache.begin(); iter != codeCache.end(); iter++)
|
||||||
|
{
|
||||||
|
openCLSafeCall(clReleaseProgram(iter->second));
|
||||||
|
}
|
||||||
|
codeCache.clear();
|
||||||
|
cacheSize = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int enable_disk_cache =
|
||||||
|
#ifdef _DEBUG
|
||||||
|
false;
|
||||||
|
#else
|
||||||
|
true;
|
||||||
|
#endif
|
||||||
|
static int update_disk_cache = false;
|
||||||
|
static String binpath = "";
|
||||||
|
|
||||||
|
void setBinaryDiskCache(int mode, String path)
|
||||||
|
{
|
||||||
|
if(mode == CACHE_NONE)
|
||||||
|
{
|
||||||
|
update_disk_cache = 0;
|
||||||
|
enable_disk_cache = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
update_disk_cache |= (mode & CACHE_UPDATE) == CACHE_UPDATE;
|
||||||
|
enable_disk_cache |=
|
||||||
|
#ifdef _DEBUG
|
||||||
|
(mode & CACHE_DEBUG) == CACHE_DEBUG;
|
||||||
|
#else
|
||||||
|
(mode & CACHE_RELEASE) == CACHE_RELEASE;
|
||||||
|
#endif
|
||||||
|
if(enable_disk_cache && !path.empty())
|
||||||
|
{
|
||||||
|
binpath = path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void setBinpath(const char *path)
|
||||||
|
{
|
||||||
|
binpath = path;
|
||||||
|
}
|
||||||
|
|
||||||
|
int savetofile(const Context*, cl_program &program, const char *fileName)
|
||||||
|
{
|
||||||
|
size_t binarySize;
|
||||||
|
openCLSafeCall(clGetProgramInfo(program,
|
||||||
|
CL_PROGRAM_BINARY_SIZES,
|
||||||
|
sizeof(size_t),
|
||||||
|
&binarySize, NULL));
|
||||||
|
char* binary = (char*)malloc(binarySize);
|
||||||
|
if(binary == NULL)
|
||||||
|
{
|
||||||
|
CV_Error(CV_StsNoMem, "Failed to allocate host memory.");
|
||||||
|
}
|
||||||
|
openCLSafeCall(clGetProgramInfo(program,
|
||||||
|
CL_PROGRAM_BINARIES,
|
||||||
|
sizeof(char *),
|
||||||
|
&binary,
|
||||||
|
NULL));
|
||||||
|
|
||||||
|
FILE *fp = fopen(fileName, "wb+");
|
||||||
|
if(fp != NULL)
|
||||||
|
{
|
||||||
|
fwrite(binary, binarySize, 1, fp);
|
||||||
|
free(binary);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_program ProgramCache::getProgram(const Context *ctx, const char **source, string kernelName,
|
||||||
|
const char *build_options)
|
||||||
|
{
|
||||||
|
cl_program program;
|
||||||
|
cl_int status = 0;
|
||||||
|
stringstream src_sign;
|
||||||
|
string srcsign;
|
||||||
|
string filename;
|
||||||
|
|
||||||
|
if (NULL != build_options)
|
||||||
|
{
|
||||||
|
src_sign << (int64)(*source) << getClContext(ctx) << "_" << build_options;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
src_sign << (int64)(*source) << getClContext(ctx);
|
||||||
|
}
|
||||||
|
srcsign = src_sign.str();
|
||||||
|
|
||||||
|
program = NULL;
|
||||||
|
program = ProgramCache::getProgramCache()->progLookup(srcsign);
|
||||||
|
|
||||||
|
if (!program)
|
||||||
|
{
|
||||||
|
//config build programs
|
||||||
|
std::string all_build_options;
|
||||||
|
if (!ctx->getDeviceInfo().compilationExtraOptions.empty())
|
||||||
|
all_build_options += ctx->getDeviceInfo().compilationExtraOptions;
|
||||||
|
if (build_options != NULL)
|
||||||
|
{
|
||||||
|
all_build_options += " ";
|
||||||
|
all_build_options += build_options;
|
||||||
|
}
|
||||||
|
filename = binpath + kernelName + "_" + ctx->getDeviceInfo().deviceName + all_build_options + ".clb";
|
||||||
|
|
||||||
|
FILE *fp = enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL;
|
||||||
|
if(fp == NULL || update_disk_cache)
|
||||||
|
{
|
||||||
|
if(fp != NULL)
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
program = clCreateProgramWithSource(
|
||||||
|
getClContext(ctx), 1, source, NULL, &status);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
cl_device_id device = getClDeviceID(ctx);
|
||||||
|
status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL);
|
||||||
|
if(status == CL_SUCCESS && enable_disk_cache)
|
||||||
|
savetofile(ctx, program, filename.c_str());
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
fseek(fp, 0, SEEK_END);
|
||||||
|
size_t binarySize = ftell(fp);
|
||||||
|
fseek(fp, 0, SEEK_SET);
|
||||||
|
char *binary = new char[binarySize];
|
||||||
|
CV_Assert(1 == fread(binary, binarySize, 1, fp));
|
||||||
|
fclose(fp);
|
||||||
|
cl_int status = 0;
|
||||||
|
cl_device_id device = getClDeviceID(ctx);
|
||||||
|
program = clCreateProgramWithBinary(getClContext(ctx),
|
||||||
|
1,
|
||||||
|
&device,
|
||||||
|
(const size_t *)&binarySize,
|
||||||
|
(const unsigned char **)&binary,
|
||||||
|
NULL,
|
||||||
|
&status);
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
status = clBuildProgram(program, 1, &device, all_build_options.c_str(), NULL, NULL);
|
||||||
|
delete[] binary;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(status != CL_SUCCESS)
|
||||||
|
{
|
||||||
|
if(status == CL_BUILD_PROGRAM_FAILURE)
|
||||||
|
{
|
||||||
|
cl_int logStatus;
|
||||||
|
char *buildLog = NULL;
|
||||||
|
size_t buildLogSize = 0;
|
||||||
|
logStatus = clGetProgramBuildInfo(program,
|
||||||
|
getClDeviceID(ctx), CL_PROGRAM_BUILD_LOG, buildLogSize,
|
||||||
|
buildLog, &buildLogSize);
|
||||||
|
if(logStatus != CL_SUCCESS)
|
||||||
|
std::cout << "Failed to build the program and get the build info." << endl;
|
||||||
|
buildLog = new char[buildLogSize];
|
||||||
|
CV_DbgAssert(!!buildLog);
|
||||||
|
memset(buildLog, 0, buildLogSize);
|
||||||
|
openCLSafeCall(clGetProgramBuildInfo(program, getClDeviceID(ctx),
|
||||||
|
CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL));
|
||||||
|
std::cout << "\n\t\t\tBUILD LOG\n";
|
||||||
|
std::cout << buildLog << endl;
|
||||||
|
delete [] buildLog;
|
||||||
|
}
|
||||||
|
openCLVerifyCall(status);
|
||||||
|
}
|
||||||
|
//Cache the binary for future use if build_options is null
|
||||||
|
if( (this->cacheSize += 1) < MAX_PROG_CACHE_SIZE)
|
||||||
|
this->addProgram(srcsign, program);
|
||||||
|
else
|
||||||
|
cout << "Warning: code cache has been full.\n";
|
||||||
|
}
|
||||||
|
return program;
|
||||||
|
}
|
||||||
|
|
||||||
|
//// Converts the contents of a file into a string
|
||||||
|
//static int convertToString(const char *filename, std::string& s)
|
||||||
|
//{
|
||||||
|
// size_t size;
|
||||||
|
// char* str;
|
||||||
|
//
|
||||||
|
// std::fstream f(filename, (std::fstream::in | std::fstream::binary));
|
||||||
|
// if(f.is_open())
|
||||||
|
// {
|
||||||
|
// size_t fileSize;
|
||||||
|
// f.seekg(0, std::fstream::end);
|
||||||
|
// size = fileSize = (size_t)f.tellg();
|
||||||
|
// f.seekg(0, std::fstream::beg);
|
||||||
|
//
|
||||||
|
// str = new char[size+1];
|
||||||
|
// if(!str)
|
||||||
|
// {
|
||||||
|
// f.close();
|
||||||
|
// return -1;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// f.read(str, fileSize);
|
||||||
|
// f.close();
|
||||||
|
// str[size] = '\0';
|
||||||
|
//
|
||||||
|
// s = str;
|
||||||
|
// delete[] str;
|
||||||
|
// return 0;
|
||||||
|
// }
|
||||||
|
// printf("Error: Failed to open file %s\n", filename);
|
||||||
|
// return -1;
|
||||||
|
//}
|
||||||
|
|
||||||
|
} // namespace ocl
|
||||||
|
} // namespace cv
|
@ -152,19 +152,19 @@ namespace cv
|
|||||||
case CL_INVALID_GLOBAL_WORK_SIZE:
|
case CL_INVALID_GLOBAL_WORK_SIZE:
|
||||||
return "CL_INVALID_GLOBAL_WORK_SIZE";
|
return "CL_INVALID_GLOBAL_WORK_SIZE";
|
||||||
//case CL_INVALID_PROPERTY:
|
//case CL_INVALID_PROPERTY:
|
||||||
// return "CL_INVALID_PROPERTY";
|
// return "CL_INVALID_PROPERTY";
|
||||||
//case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
|
//case CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR:
|
||||||
// return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
|
// return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
|
||||||
//case CL_PLATFORM_NOT_FOUND_KHR:
|
//case CL_PLATFORM_NOT_FOUND_KHR:
|
||||||
// return "CL_PLATFORM_NOT_FOUND_KHR";
|
// return "CL_PLATFORM_NOT_FOUND_KHR";
|
||||||
// //case CL_INVALID_PROPERTY_EXT:
|
// //case CL_INVALID_PROPERTY_EXT:
|
||||||
// // return "CL_INVALID_PROPERTY_EXT";
|
// // return "CL_INVALID_PROPERTY_EXT";
|
||||||
//case CL_DEVICE_PARTITION_FAILED_EXT:
|
//case CL_DEVICE_PARTITION_FAILED_EXT:
|
||||||
// return "CL_DEVICE_PARTITION_FAILED_EXT";
|
// return "CL_DEVICE_PARTITION_FAILED_EXT";
|
||||||
//case CL_INVALID_PARTITION_COUNT_EXT:
|
//case CL_INVALID_PARTITION_COUNT_EXT:
|
||||||
// return "CL_INVALID_PARTITION_COUNT_EXT";
|
// return "CL_INVALID_PARTITION_COUNT_EXT";
|
||||||
//default:
|
//default:
|
||||||
// return "unknown error code";
|
// return "unknown error code";
|
||||||
}
|
}
|
||||||
static char buf[256];
|
static char buf[256];
|
||||||
sprintf(buf, "%d", err);
|
sprintf(buf, "%d", err);
|
||||||
|
@ -156,25 +156,25 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
|
|||||||
{
|
{
|
||||||
fft_setup();
|
fft_setup();
|
||||||
|
|
||||||
bool is_1d_input = (_dft_size.height == 1);
|
bool is_1d_input = (_dft_size.height == 1);
|
||||||
int is_row_dft = flags & DFT_ROWS;
|
int is_row_dft = flags & DFT_ROWS;
|
||||||
int is_scaled_dft = flags & DFT_SCALE;
|
int is_scaled_dft = flags & DFT_SCALE;
|
||||||
int is_inverse = flags & DFT_INVERSE;
|
int is_inverse = flags & DFT_INVERSE;
|
||||||
|
|
||||||
//clAmdFftResultLocation place;
|
//clAmdFftResultLocation place;
|
||||||
clAmdFftLayout inLayout;
|
clAmdFftLayout inLayout;
|
||||||
clAmdFftLayout outLayout;
|
clAmdFftLayout outLayout;
|
||||||
clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
|
clAmdFftDim dim = is_1d_input || is_row_dft ? CLFFT_1D : CLFFT_2D;
|
||||||
|
|
||||||
size_t batchSize = is_row_dft ? dft_size.height : 1;
|
size_t batchSize = is_row_dft ? dft_size.height : 1;
|
||||||
size_t clLengthsIn[ 3 ] = {1, 1, 1};
|
size_t clLengthsIn[ 3 ] = {1, 1, 1};
|
||||||
size_t clStridesIn[ 3 ] = {1, 1, 1};
|
size_t clStridesIn[ 3 ] = {1, 1, 1};
|
||||||
//size_t clLengthsOut[ 3 ] = {1, 1, 1};
|
//size_t clLengthsOut[ 3 ] = {1, 1, 1};
|
||||||
size_t clStridesOut[ 3 ] = {1, 1, 1};
|
size_t clStridesOut[ 3 ] = {1, 1, 1};
|
||||||
clLengthsIn[0] = dft_size.width;
|
clLengthsIn[0] = dft_size.width;
|
||||||
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
|
clLengthsIn[1] = is_row_dft ? 1 : dft_size.height;
|
||||||
clStridesIn[0] = 1;
|
clStridesIn[0] = 1;
|
||||||
clStridesOut[0] = 1;
|
clStridesOut[0] = 1;
|
||||||
|
|
||||||
switch(_type)
|
switch(_type)
|
||||||
{
|
{
|
||||||
@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
|
|||||||
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
|
clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1];
|
||||||
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
|
clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1];
|
||||||
|
|
||||||
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getoclContext(), dim, clLengthsIn ) );
|
openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getClContextPtr(), dim, clLengthsIn ) );
|
||||||
|
|
||||||
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
|
openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) );
|
||||||
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
|
openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) );
|
||||||
@ -220,7 +220,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla
|
|||||||
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
|
openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) );
|
||||||
|
|
||||||
//ready to bake
|
//ready to bake
|
||||||
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getoclCommandQueue(), NULL, NULL ) );
|
openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getClCommandQueuePtr(), NULL, NULL ) );
|
||||||
}
|
}
|
||||||
cv::ocl::FftPlan::~FftPlan()
|
cv::ocl::FftPlan::~FftPlan()
|
||||||
{
|
{
|
||||||
@ -296,12 +296,12 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
|||||||
// similar assertions with cuda module
|
// similar assertions with cuda module
|
||||||
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
|
CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2);
|
||||||
|
|
||||||
//bool is_1d_input = (src.rows == 1);
|
//bool is_1d_input = (src.rows == 1);
|
||||||
//int is_row_dft = flags & DFT_ROWS;
|
//int is_row_dft = flags & DFT_ROWS;
|
||||||
//int is_scaled_dft = flags & DFT_SCALE;
|
//int is_scaled_dft = flags & DFT_SCALE;
|
||||||
int is_inverse = flags & DFT_INVERSE;
|
int is_inverse = flags & DFT_INVERSE;
|
||||||
bool is_complex_input = src.channels() == 2;
|
bool is_complex_input = src.channels() == 2;
|
||||||
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
|
bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
|
||||||
|
|
||||||
|
|
||||||
// We don't support real-to-real transform
|
// We don't support real-to-real transform
|
||||||
@ -338,10 +338,10 @@ void cv::ocl::dft(const oclMat &src, oclMat &dst, Size dft_size, int flags)
|
|||||||
if (buffersize)
|
if (buffersize)
|
||||||
{
|
{
|
||||||
cl_int medstatus;
|
cl_int medstatus;
|
||||||
clMedBuffer = clCreateBuffer ( (cl_context)src.clCxt->oclContext(), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
|
clMedBuffer = clCreateBuffer ( *(cl_context*)(src.clCxt->getOpenCLContextPtr()), CL_MEM_READ_WRITE, buffersize, 0, &medstatus);
|
||||||
openCLSafeCall( medstatus );
|
openCLSafeCall( medstatus );
|
||||||
}
|
}
|
||||||
cl_command_queue clq = (cl_command_queue)src.clCxt->oclCommandQueue();
|
cl_command_queue clq = *(cl_command_queue*)(src.clCxt->getOpenCLCommandQueuePtr());
|
||||||
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
|
openCLSafeCall( clAmdFftEnqueueTransform( plHandle,
|
||||||
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
|
is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
|
||||||
1,
|
1,
|
||||||
|
@ -1430,7 +1430,7 @@ void cv::ocl::Scharr(const oclMat &src, oclMat &dst, int ddepth, int dx, int dy,
|
|||||||
|
|
||||||
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
|
void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, double scale)
|
||||||
{
|
{
|
||||||
if (!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.type() == CV_64F)
|
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
return;
|
return;
|
||||||
|
@ -134,7 +134,7 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha,
|
|||||||
int offb = src2.offset;
|
int offb = src2.offset;
|
||||||
int offc = dst.offset;
|
int offc = dst.offset;
|
||||||
|
|
||||||
cl_command_queue clq = (cl_command_queue)src1.clCxt->oclCommandQueue();
|
cl_command_queue clq = *(cl_command_queue*)src1.clCxt->getOpenCLCommandQueuePtr();
|
||||||
switch(src1.type())
|
switch(src1.type())
|
||||||
{
|
{
|
||||||
case CV_32FC1:
|
case CV_32FC1:
|
||||||
|
@ -338,7 +338,7 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &poin
|
|||||||
CV_DbgAssert(points.type() == CV_32FC2);
|
CV_DbgAssert(points.type() == CV_32FC2);
|
||||||
points_v.resize(points.cols);
|
points_v.resize(points.cols);
|
||||||
openCLSafeCall(clEnqueueReadBuffer(
|
openCLSafeCall(clEnqueueReadBuffer(
|
||||||
*reinterpret_cast<cl_command_queue*>(getoclCommandQueue()),
|
*(cl_command_queue*)getClCommandQueuePtr(),
|
||||||
reinterpret_cast<cl_mem>(points.data),
|
reinterpret_cast<cl_mem>(points.data),
|
||||||
CL_TRUE,
|
CL_TRUE,
|
||||||
0,
|
0,
|
||||||
|
@ -745,7 +745,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
|
if( gimg.cols < minSize.width || gimg.rows < minSize.height )
|
||||||
CV_Error(CV_StsError, "Image too small");
|
CV_Error(CV_StsError, "Image too small");
|
||||||
|
|
||||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
cl_command_queue qu = getClCommandQueue(Context::getContext());
|
||||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||||
{
|
{
|
||||||
CvSize winSize0 = cascade->orig_window_size;
|
CvSize winSize0 = cascade->orig_window_size;
|
||||||
@ -788,7 +788,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
|
|
||||||
size_t blocksize = 8;
|
size_t blocksize = 8;
|
||||||
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
||||||
size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->computeUnits()) *localThreads[0],
|
size_t globalThreads[3] = { grp_per_CU *(gsum.clCxt->getDeviceInfo().maxComputeUnits) *localThreads[0],
|
||||||
localThreads[1], 1
|
localThreads[1], 1
|
||||||
};
|
};
|
||||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||||
@ -949,7 +949,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
|
|||||||
int grp_per_CU = 12;
|
int grp_per_CU = 12;
|
||||||
size_t blocksize = 8;
|
size_t blocksize = 8;
|
||||||
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
size_t localThreads[3] = { blocksize, blocksize , 1 };
|
||||||
size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->computeUnits() *localThreads[0],
|
size_t globalThreads[3] = { grp_per_CU *gsum.clCxt->getDeviceInfo().maxComputeUnits *localThreads[0],
|
||||||
localThreads[1], 1 };
|
localThreads[1], 1 };
|
||||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||||
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
|
int nodenum = (datasize - sizeof(GpuHidHaarClassifierCascade) -
|
||||||
@ -1120,7 +1120,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
|||||||
int blocksize = 8;
|
int blocksize = 8;
|
||||||
int grp_per_CU = 12;
|
int grp_per_CU = 12;
|
||||||
size_t localThreads[3] = { blocksize, blocksize, 1 };
|
size_t localThreads[3] = { blocksize, blocksize, 1 };
|
||||||
size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->computeUnits() *localThreads[0],
|
size_t globalThreads[3] = { grp_per_CU * cv::ocl::Context::getContext()->getDeviceInfo().maxComputeUnits *localThreads[0],
|
||||||
localThreads[1],
|
localThreads[1],
|
||||||
1 };
|
1 };
|
||||||
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
int outputsz = 256 * globalThreads[0] / localThreads[0];
|
||||||
@ -1148,7 +1148,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
|
|||||||
}
|
}
|
||||||
|
|
||||||
int *candidate;
|
int *candidate;
|
||||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
cl_command_queue qu = getClCommandQueue(Context::getContext());
|
||||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||||
{
|
{
|
||||||
int indexy = 0;
|
int indexy = 0;
|
||||||
@ -1340,7 +1340,7 @@ void cv::ocl::OclCascadeClassifierBuf::Init(const int rows, const int cols,
|
|||||||
GpuHidHaarStageClassifier *stage;
|
GpuHidHaarStageClassifier *stage;
|
||||||
GpuHidHaarClassifier *classifier;
|
GpuHidHaarClassifier *classifier;
|
||||||
GpuHidHaarTreeNode *node;
|
GpuHidHaarTreeNode *node;
|
||||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
cl_command_queue qu = getClCommandQueue(Context::getContext());
|
||||||
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
if( (flags & CV_HAAR_SCALE_IMAGE) )
|
||||||
{
|
{
|
||||||
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
|
gcascade = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
|
||||||
@ -1505,7 +1505,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
|
|||||||
CvSize sz;
|
CvSize sz;
|
||||||
CvSize winSize0 = oldCascade->orig_window_size;
|
CvSize winSize0 = oldCascade->orig_window_size;
|
||||||
detect_piramid_info *scaleinfo;
|
detect_piramid_info *scaleinfo;
|
||||||
cl_command_queue qu = reinterpret_cast<cl_command_queue>(Context::getContext()->oclCommandQueue());
|
cl_command_queue qu = getClCommandQueue(Context::getContext());
|
||||||
if (flags & CV_HAAR_SCALE_IMAGE)
|
if (flags & CV_HAAR_SCALE_IMAGE)
|
||||||
{
|
{
|
||||||
for(factor = 1.f;; factor *= scaleFactor)
|
for(factor = 1.f;; factor *= scaleFactor)
|
||||||
|
@ -157,7 +157,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
|
|||||||
|
|
||||||
effect_size = Size(0, 0);
|
effect_size = Size(0, 0);
|
||||||
|
|
||||||
if (queryDeviceInfo<IS_CPU_DEVICE, bool>())
|
if (isCpuDevice())
|
||||||
hog_device_cpu = true;
|
hog_device_cpu = true;
|
||||||
else
|
else
|
||||||
hog_device_cpu = false;
|
hog_device_cpu = false;
|
||||||
@ -1670,9 +1670,9 @@ void cv::ocl::device::hog::compute_hists(int nbins,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
||||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
size_t wave_size = queryWaveFrontSize(kernel);
|
||||||
char opt[32] = {0};
|
char opt[32] = {0};
|
||||||
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
|
sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
|
||||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
||||||
localThreads, args, -1, -1, opt);
|
localThreads, args, -1, -1, opt);
|
||||||
}
|
}
|
||||||
@ -1734,9 +1734,9 @@ void cv::ocl::device::hog::normalize_hists(int nbins,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
||||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
size_t wave_size = queryWaveFrontSize(kernel);
|
||||||
char opt[32] = {0};
|
char opt[32] = {0};
|
||||||
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
|
sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
|
||||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
||||||
localThreads, args, -1, -1, opt);
|
localThreads, args, -1, -1, opt);
|
||||||
}
|
}
|
||||||
@ -1803,9 +1803,9 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
||||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
size_t wave_size = queryWaveFrontSize(kernel);
|
||||||
char opt[32] = {0};
|
char opt[32] = {0};
|
||||||
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
|
sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
|
||||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
||||||
localThreads, args, -1, -1, opt);
|
localThreads, args, -1, -1, opt);
|
||||||
}
|
}
|
||||||
|
@ -289,7 +289,7 @@ namespace cv
|
|||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
|
||||||
|
|
||||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
{
|
{
|
||||||
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
|
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
|
||||||
}
|
}
|
||||||
@ -317,7 +317,7 @@ namespace cv
|
|||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.cols));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&map1.rows));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&cols));
|
||||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
{
|
{
|
||||||
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
|
args.push_back( make_pair(sizeof(cl_double4), (void *)&borderValue));
|
||||||
}
|
}
|
||||||
@ -380,7 +380,7 @@ namespace cv
|
|||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
{
|
{
|
||||||
args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
|
args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
|
||||||
args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
|
args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
|
||||||
@ -802,12 +802,12 @@ namespace cv
|
|||||||
string kernelName = "warpAffine" + s[interpolation];
|
string kernelName = "warpAffine" + s[interpolation];
|
||||||
|
|
||||||
|
|
||||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
{
|
{
|
||||||
cl_int st;
|
cl_int st;
|
||||||
coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
|
coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(F) * 2 * 3, NULL, &st );
|
||||||
openCLVerifyCall(st);
|
openCLVerifyCall(st);
|
||||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
|
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(F) * 2 * 3, coeffs, 0, 0, 0));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -817,8 +817,8 @@ namespace cv
|
|||||||
{
|
{
|
||||||
float_coeffs[m][n] = coeffs[m][n];
|
float_coeffs[m][n] = coeffs[m][n];
|
||||||
}
|
}
|
||||||
coeffs_cm = clCreateBuffer( (cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
|
coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 2 * 3, NULL, &st );
|
||||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
|
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 2 * 3, float_coeffs, 0, 0, 0));
|
||||||
|
|
||||||
}
|
}
|
||||||
//TODO: improve this kernel
|
//TODO: improve this kernel
|
||||||
@ -872,12 +872,12 @@ namespace cv
|
|||||||
string s[3] = {"NN", "Linear", "Cubic"};
|
string s[3] = {"NN", "Linear", "Cubic"};
|
||||||
string kernelName = "warpPerspective" + s[interpolation];
|
string kernelName = "warpPerspective" + s[interpolation];
|
||||||
|
|
||||||
if(src.clCxt->supportsFeature(Context::CL_DOUBLE))
|
if(src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
{
|
{
|
||||||
cl_int st;
|
cl_int st;
|
||||||
coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
|
coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(double) * 3 * 3, NULL, &st );
|
||||||
openCLVerifyCall(st);
|
openCLVerifyCall(st);
|
||||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
|
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(double) * 3 * 3, coeffs, 0, 0, 0));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -886,9 +886,9 @@ namespace cv
|
|||||||
for(int n = 0; n < 3; n++)
|
for(int n = 0; n < 3; n++)
|
||||||
float_coeffs[m][n] = coeffs[m][n];
|
float_coeffs[m][n] = coeffs[m][n];
|
||||||
|
|
||||||
coeffs_cm = clCreateBuffer((cl_context) clCxt->oclContext(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
|
coeffs_cm = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, sizeof(float) * 3 * 3, NULL, &st );
|
||||||
openCLVerifyCall(st);
|
openCLVerifyCall(st);
|
||||||
openCLSafeCall(clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
|
openCLSafeCall(clEnqueueWriteBuffer(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), (cl_mem)coeffs_cm, 1, 0, sizeof(float) * 3 * 3, float_coeffs, 0, 0, 0));
|
||||||
}
|
}
|
||||||
//TODO: improve this kernel
|
//TODO: improve this kernel
|
||||||
size_t blkSizeX = 16, blkSizeY = 16;
|
size_t blkSizeX = 16, blkSizeY = 16;
|
||||||
@ -994,7 +994,7 @@ namespace cv
|
|||||||
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
|
void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
|
||||||
{
|
{
|
||||||
CV_Assert(src.type() == CV_8UC1);
|
CV_Assert(src.type() == CV_8UC1);
|
||||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
||||||
}
|
}
|
||||||
@ -1192,7 +1192,7 @@ namespace cv
|
|||||||
void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
|
void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize,
|
||||||
double k, int borderType)
|
double k, int borderType)
|
||||||
{
|
{
|
||||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
||||||
}
|
}
|
||||||
@ -1211,7 +1211,7 @@ namespace cv
|
|||||||
|
|
||||||
void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
|
void cornerMinEigenVal_dxdy(const oclMat &src, oclMat &dst, oclMat &dx, oclMat &dy, int blockSize, int ksize, int borderType)
|
||||||
{
|
{
|
||||||
if(!src.clCxt->supportsFeature(Context::CL_DOUBLE) && src.depth() == CV_64F)
|
if(!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
CV_Error(CV_GpuNotSupported, "select device don't support double");
|
||||||
}
|
}
|
||||||
@ -1512,17 +1512,17 @@ namespace cv
|
|||||||
String kernelName = "calcLut";
|
String kernelName = "calcLut";
|
||||||
size_t localThreads[3] = { 32, 8, 1 };
|
size_t localThreads[3] = { 32, 8, 1 };
|
||||||
size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
|
size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
|
||||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
bool is_cpu = isCpuDevice();
|
||||||
if (is_cpu)
|
if (is_cpu)
|
||||||
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU");
|
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU");
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName);
|
cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName);
|
||||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
size_t wave_size = queryWaveFrontSize(kernel);
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
|
|
||||||
static char opt[20] = {0};
|
static char opt[20] = {0};
|
||||||
sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
|
sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size);
|
||||||
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
|
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -44,17 +44,11 @@
|
|||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
|
#include "opencl_kernels.hpp"
|
||||||
|
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::ocl;
|
using namespace cv::ocl;
|
||||||
|
|
||||||
namespace cv
|
|
||||||
{
|
|
||||||
namespace ocl
|
|
||||||
{
|
|
||||||
extern const char* knearest;//knearest
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
KNearestNeighbour::KNearestNeighbour()
|
KNearestNeighbour::KNearestNeighbour()
|
||||||
{
|
{
|
||||||
clear();
|
clear();
|
||||||
@ -112,7 +106,7 @@ void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lable
|
|||||||
k1 = MIN( k1, k );
|
k1 = MIN( k1, k );
|
||||||
|
|
||||||
String kernel_name = "knn_find_nearest";
|
String kernel_name = "knn_find_nearest";
|
||||||
cl_ulong local_memory_size = queryLocalMemInfo();
|
cl_ulong local_memory_size = (cl_ulong)Context::getContext()->getDeviceInfo().localMemorySize;
|
||||||
int nThreads = local_memory_size / (2 * k * 4);
|
int nThreads = local_memory_size / (2 * k * 4);
|
||||||
if(nThreads >= 256)
|
if(nThreads >= 256)
|
||||||
nThreads = 256;
|
nThreads = 256;
|
||||||
@ -122,7 +116,7 @@ void KNearestNeighbour::find_nearest(const oclMat& samples, int k, oclMat& lable
|
|||||||
size_t global_thread[] = {1, samples.rows, 1};
|
size_t global_thread[] = {1, samples.rows, 1};
|
||||||
|
|
||||||
char build_option[50];
|
char build_option[50];
|
||||||
if(!Context::getContext()->supportsFeature(Context::CL_DOUBLE))
|
if(!Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
{
|
{
|
||||||
sprintf(build_option, " ");
|
sprintf(build_option, " ");
|
||||||
}else
|
}else
|
||||||
|
@ -134,7 +134,6 @@ void cv::ocl::oclMat::upload(const Mat &m)
|
|||||||
Size wholeSize;
|
Size wholeSize;
|
||||||
Point ofs;
|
Point ofs;
|
||||||
m.locateROI(wholeSize, ofs);
|
m.locateROI(wholeSize, ofs);
|
||||||
|
|
||||||
create(wholeSize, m.type());
|
create(wholeSize, m.type());
|
||||||
|
|
||||||
if (m.channels() == 3)
|
if (m.channels() == 3)
|
||||||
@ -142,13 +141,12 @@ void cv::ocl::oclMat::upload(const Mat &m)
|
|||||||
int pitch = wholeSize.width * 3 * m.elemSize1();
|
int pitch = wholeSize.width * 3 * m.elemSize1();
|
||||||
int tail_padding = m.elemSize1() * 3072;
|
int tail_padding = m.elemSize1() * 3072;
|
||||||
int err;
|
int err;
|
||||||
cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
|
cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
|
||||||
(pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
(pitch * wholeSize.height + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
||||||
openCLVerifyCall(err);
|
openCLVerifyCall(err);
|
||||||
|
|
||||||
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
|
openCLMemcpy2D(clCxt, temp, pitch, m.datastart, m.step, wholeSize.width * m.elemSize(), wholeSize.height, clMemcpyHostToDevice, 3);
|
||||||
convert_C3C4(temp, *this);
|
convert_C3C4(temp, *this);
|
||||||
|
|
||||||
openCLSafeCall(clReleaseMemObject(temp));
|
openCLSafeCall(clReleaseMemObject(temp));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -197,13 +195,12 @@ void cv::ocl::oclMat::download(cv::Mat &m) const
|
|||||||
int pitch = wholecols * 3 * m.elemSize1();
|
int pitch = wholecols * 3 * m.elemSize1();
|
||||||
int tail_padding = m.elemSize1() * 3072;
|
int tail_padding = m.elemSize1() * 3072;
|
||||||
int err;
|
int err;
|
||||||
cl_mem temp = clCreateBuffer((cl_context)clCxt->oclContext(), CL_MEM_READ_WRITE,
|
cl_mem temp = clCreateBuffer(*(cl_context*)clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE,
|
||||||
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
(pitch * wholerows + tail_padding - 1) / tail_padding * tail_padding, 0, &err);
|
||||||
openCLVerifyCall(err);
|
openCLVerifyCall(err);
|
||||||
|
|
||||||
convert_C4C3(*this, temp);
|
convert_C4C3(*this, temp);
|
||||||
openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
|
openCLMemcpy2D(clCxt, m.data, m.step, temp, pitch, wholecols * m.elemSize(), wholerows, clMemcpyDeviceToHost, 3);
|
||||||
|
|
||||||
openCLSafeCall(clReleaseMemObject(temp));
|
openCLSafeCall(clReleaseMemObject(temp));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -319,7 +316,7 @@ static void convert_run(const oclMat &src, oclMat &dst, double alpha, double bet
|
|||||||
|
|
||||||
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
|
void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double beta ) const
|
||||||
{
|
{
|
||||||
if (!clCxt->supportsFeature(Context::CL_DOUBLE) &&
|
if (!clCxt->supportsFeature(FEATURE_CL_DOUBLE) &&
|
||||||
(depth() == CV_64F || dst.depth() == CV_64F))
|
(depth() == CV_64F || dst.depth() == CV_64F))
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
@ -380,7 +377,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri
|
|||||||
#ifdef CL_VERSION_1_2
|
#ifdef CL_VERSION_1_2
|
||||||
// this enables backwards portability to
|
// this enables backwards portability to
|
||||||
// run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
|
// run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
|
||||||
if (Context::getContext()->supportsFeature(Context::CL_VER_1_2) &&
|
if (Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2) &&
|
||||||
dst.offset == 0 && dst.cols == dst.wholecols)
|
dst.offset == 0 && dst.cols == dst.wholecols)
|
||||||
{
|
{
|
||||||
const int sizeofMap[][7] =
|
const int sizeofMap[][7] =
|
||||||
@ -392,7 +389,7 @@ static void set_to_withoutmask_run(const oclMat &dst, const Scalar &scalar, stri
|
|||||||
};
|
};
|
||||||
int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];
|
int sizeofGeneric = sizeofMap[dst.oclchannels() - 1][dst.depth()];
|
||||||
|
|
||||||
clEnqueueFillBuffer((cl_command_queue)dst.clCxt->oclCommandQueue(),
|
clEnqueueFillBuffer(getClCommandQueue(dst.clCxt),
|
||||||
(cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
|
(cl_mem)dst.data, (void*)mat.data, sizeofGeneric,
|
||||||
0, dst.step * dst.rows, 0, NULL, NULL);
|
0, dst.step * dst.rows, 0, NULL, NULL);
|
||||||
}
|
}
|
||||||
|
@ -101,15 +101,15 @@ namespace cv
|
|||||||
for(size_t i = 0; i < args.size(); i ++)
|
for(size_t i = 0; i < args.size(); i ++)
|
||||||
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
|
||||||
|
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr(), kernel, 3, NULL, globalThreads,
|
||||||
localThreads, 0, NULL, NULL));
|
localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
switch(finish_mode)
|
switch(finish_mode)
|
||||||
{
|
{
|
||||||
case CLFINISH:
|
case CLFINISH:
|
||||||
clFinish((cl_command_queue)clCxt->oclCommandQueue());
|
clFinish(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr());
|
||||||
case CLFLUSH:
|
case CLFLUSH:
|
||||||
clFlush((cl_command_queue)clCxt->oclCommandQueue());
|
clFlush(*(cl_command_queue*)clCxt->getOpenCLCommandQueuePtr());
|
||||||
break;
|
break;
|
||||||
case DISABLE:
|
case DISABLE:
|
||||||
default:
|
default:
|
||||||
@ -178,7 +178,7 @@ namespace cv
|
|||||||
#ifdef CL_VERSION_1_2
|
#ifdef CL_VERSION_1_2
|
||||||
//this enables backwards portability to
|
//this enables backwards portability to
|
||||||
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
|
//run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
|
||||||
if(Context::getContext()->supportsFeature(Context::CL_VER_1_2))
|
if(Context::getContext()->supportsFeature(FEATURE_CL_VER_1_2))
|
||||||
{
|
{
|
||||||
cl_image_desc desc;
|
cl_image_desc desc;
|
||||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||||
@ -191,13 +191,13 @@ namespace cv
|
|||||||
desc.buffer = NULL;
|
desc.buffer = NULL;
|
||||||
desc.num_mip_levels = 0;
|
desc.num_mip_levels = 0;
|
||||||
desc.num_samples = 0;
|
desc.num_samples = 0;
|
||||||
texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
texture = clCreateImage(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
{
|
{
|
||||||
texture = clCreateImage2D(
|
texture = clCreateImage2D(
|
||||||
(cl_context)mat.clCxt->oclContext(),
|
*(cl_context*)mat.clCxt->getOpenCLContextPtr(),
|
||||||
CL_MEM_READ_WRITE,
|
CL_MEM_READ_WRITE,
|
||||||
&format,
|
&format,
|
||||||
mat.cols,
|
mat.cols,
|
||||||
@ -212,22 +212,22 @@ namespace cv
|
|||||||
cl_mem devData;
|
cl_mem devData;
|
||||||
if (mat.cols * mat.elemSize() != mat.step)
|
if (mat.cols * mat.elemSize() != mat.step)
|
||||||
{
|
{
|
||||||
devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
|
devData = clCreateBuffer(*(cl_context*)mat.clCxt->getOpenCLContextPtr(), CL_MEM_READ_ONLY, mat.cols * mat.rows
|
||||||
* mat.elemSize(), NULL, NULL);
|
* mat.elemSize(), NULL, NULL);
|
||||||
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
|
const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
|
||||||
clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
|
clEnqueueCopyBufferRect(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), (cl_mem)mat.data, devData, origin, origin,
|
||||||
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
|
regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
|
||||||
clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
|
clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
devData = (cl_mem)mat.data;
|
devData = (cl_mem)mat.data;
|
||||||
}
|
}
|
||||||
|
|
||||||
clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
|
clEnqueueCopyBufferToImage(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr(), devData, texture, 0, origin, region, 0, NULL, 0);
|
||||||
if ((mat.cols * mat.elemSize() != mat.step))
|
if ((mat.cols * mat.elemSize() != mat.step))
|
||||||
{
|
{
|
||||||
clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
|
clFlush(*(cl_command_queue*)mat.clCxt->getOpenCLCommandQueuePtr());
|
||||||
clReleaseMemObject(devData);
|
clReleaseMemObject(devData);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -259,7 +259,7 @@ namespace cv
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
|
cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
|
||||||
finish();
|
cv::ocl::finish();
|
||||||
_support = true;
|
_support = true;
|
||||||
}
|
}
|
||||||
catch (const cv::Exception& e)
|
catch (const cv::Exception& e)
|
||||||
|
@ -106,7 +106,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
|
|||||||
|
|
||||||
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
|
bool is_float = CV_SEQ_ELTYPE(contour) == CV_32FC2;
|
||||||
|
|
||||||
if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE) && is_float)
|
if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE) && is_float)
|
||||||
{
|
{
|
||||||
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
|
CV_Error(CV_StsUnsupportedFormat, "Moments - double is not supported by your GPU!");
|
||||||
}
|
}
|
||||||
@ -146,7 +146,7 @@ static void icvContourMoments( CvSeq* contour, CvMoments* mom )
|
|||||||
|
|
||||||
cv::Mat dst(dst_a);
|
cv::Mat dst(dst_a);
|
||||||
a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
|
a00 = a10 = a01 = a20 = a11 = a02 = a30 = a21 = a12 = a03 = 0.0;
|
||||||
if (!cv::ocl::Context::getContext()->supportsFeature(Context::CL_DOUBLE))
|
if (!cv::ocl::Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
|
||||||
{
|
{
|
||||||
for (int i = 0; i < contour->total; ++i)
|
for (int i = 0; i < contour->total; ++i)
|
||||||
{
|
{
|
||||||
|
@ -15,8 +15,8 @@
|
|||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||||
// Yao Wang, yao@multicorewareinc.com
|
// Yao Wang, yao@multicorewareinc.com
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
@ -125,7 +125,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
|
||||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
||||||
|
|
||||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
bool is_cpu = isCpuDevice();
|
||||||
if (is_cpu)
|
if (is_cpu)
|
||||||
{
|
{
|
||||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
|
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
|
||||||
@ -139,7 +139,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
|||||||
stringstream idxStr;
|
stringstream idxStr;
|
||||||
idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
|
idxStr << kernelName << "_C" << I.oclchannels() << "_D" << I.depth();
|
||||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
|
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &pyrlk, idxStr.str());
|
||||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
int wave_size = (int)queryWaveFrontSize(kernel);
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
|
|
||||||
static char opt[32] = {0};
|
static char opt[32] = {0};
|
||||||
|
@ -15,8 +15,8 @@
|
|||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||||
// Yao Wang, yao@multicorewareinc.com
|
// Yao Wang, yao@multicorewareinc.com
|
||||||
//
|
//
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
@ -75,7 +75,7 @@ namespace cv
|
|||||||
{
|
{
|
||||||
static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
|
static void merge_vector_run(const oclMat *mat_src, size_t n, oclMat &mat_dst)
|
||||||
{
|
{
|
||||||
if(!mat_dst.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_dst.type() == CV_64F)
|
if(!mat_dst.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_dst.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
return;
|
return;
|
||||||
@ -170,7 +170,7 @@ namespace cv
|
|||||||
static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
|
static void split_vector_run(const oclMat &mat_src, oclMat *mat_dst)
|
||||||
{
|
{
|
||||||
|
|
||||||
if(!mat_src.clCxt->supportsFeature(Context::CL_DOUBLE) && mat_src.type() == CV_64F)
|
if(!mat_src.clCxt->supportsFeature(FEATURE_CL_DOUBLE) && mat_src.type() == CV_64F)
|
||||||
{
|
{
|
||||||
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
CV_Error(CV_GpuNotSupported, "Selected device don't support double\r\n");
|
||||||
return;
|
return;
|
||||||
|
@ -150,10 +150,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step));
|
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&left.step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp));
|
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&rthis.ndisp));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -200,9 +200,9 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&cdisp_step1));
|
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&cdisp_step1));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&msg_step));
|
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&msg_step));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,10 +235,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
|
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
|
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
|
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
|
static void get_first_initial_global_caller(uchar *data_cost_selected, uchar *disp_selected_pyr,
|
||||||
@ -270,10 +270,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
|
openCLSafeCall(clSetKernelArg(kernel, 6, sizeof(cl_int), (void *)&msg_step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
|
openCLSafeCall(clSetKernelArg(kernel, 7, sizeof(cl_int), (void *)&disp_step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
|
openCLSafeCall(clSetKernelArg(kernel, 8, sizeof(cl_int), (void *)&rthis.ndisp));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -340,10 +340,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term));
|
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.max_data_term));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&left.step));
|
openCLSafeCall(clSetKernelArg(kernel, 15, sizeof(cl_int), (void *)&left.step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
openCLSafeCall(clSetKernelArg(kernel, 16, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost,
|
static void compute_data_cost_reduce_caller(uchar *disp_selected_pyr, uchar *data_cost,
|
||||||
@ -391,10 +391,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term));
|
openCLSafeCall(clSetKernelArg(kernel, 17, sizeof(cl_float), (void *)&rthis.max_data_term));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&left.step));
|
openCLSafeCall(clSetKernelArg(kernel, 18, sizeof(cl_int), (void *)&left.step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
openCLSafeCall(clSetKernelArg(kernel, 19, sizeof(cl_int), (void *)&rthis.min_disp_th));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 3, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 3, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis,
|
static void compute_data_cost(uchar *disp_selected_pyr, uchar *data_cost, StereoConstantSpaceBP &rthis,
|
||||||
@ -458,10 +458,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2));
|
openCLSafeCall(clSetKernelArg(kernel, 20, sizeof(cl_int), (void *)&disp_step2));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1));
|
openCLSafeCall(clSetKernelArg(kernel, 21, sizeof(cl_int), (void *)&msg_step1));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2));
|
openCLSafeCall(clSetKernelArg(kernel, 22, sizeof(cl_int), (void *)&msg_step2));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -500,10 +500,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
|
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step));
|
openCLSafeCall(clSetKernelArg(kernel, 13, sizeof(cl_int), (void *)&msg_step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump));
|
openCLSafeCall(clSetKernelArg(kernel, 14, sizeof(cl_float), (void *)&rthis.disc_single_jump));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
|
static void calc_all_iterations(uchar *u, uchar *d, uchar *l, uchar *r, uchar *data_cost_selected,
|
||||||
@ -552,10 +552,10 @@ namespace cv
|
|||||||
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane));
|
openCLSafeCall(clSetKernelArg(kernel, 10, sizeof(cl_int), (void *)&nr_plane));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step));
|
openCLSafeCall(clSetKernelArg(kernel, 11, sizeof(cl_int), (void *)&msg_step));
|
||||||
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
|
openCLSafeCall(clSetKernelArg(kernel, 12, sizeof(cl_int), (void *)&disp_step));
|
||||||
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getoclCommandQueue(), kernel, 2, NULL,
|
openCLSafeCall(clEnqueueNDRangeKernel(*(cl_command_queue*)getClCommandQueuePtr(), kernel, 2, NULL,
|
||||||
globalThreads, localThreads, 0, NULL, NULL));
|
globalThreads, localThreads, 0, NULL, NULL));
|
||||||
|
|
||||||
clFinish(*(cl_command_queue*)getoclCommandQueue());
|
clFinish(*(cl_command_queue*)getClCommandQueuePtr());
|
||||||
openCLSafeCall(clReleaseKernel(kernel));
|
openCLSafeCall(clReleaseKernel(kernel));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -95,7 +95,10 @@ namespace cv
|
|||||||
con_struct -> cmax_disc_term = max_disc_term;
|
con_struct -> cmax_disc_term = max_disc_term;
|
||||||
con_struct -> cdisc_single_jump = disc_single_jump;
|
con_struct -> cdisc_single_jump = disc_single_jump;
|
||||||
|
|
||||||
cl_con_struct = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), (void *)con_struct,
|
Context* clCtx = Context::getContext();
|
||||||
|
cl_context clContext = *(cl_context*)(clCtx->getOpenCLContextPtr());
|
||||||
|
cl_command_queue clCmdQueue = *(cl_command_queue*)(clCtx->getOpenCLCommandQueuePtr());
|
||||||
|
cl_con_struct = load_constant(clContext, clCmdQueue, (void *)con_struct,
|
||||||
sizeof(con_struct_t));
|
sizeof(con_struct_t));
|
||||||
|
|
||||||
delete con_struct;
|
delete con_struct;
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Jin Ma, jin@multicorewareinc.com
|
// Jin Ma, jin@multicorewareinc.com
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
//
|
//
|
||||||
|
@ -80,18 +80,18 @@ int main(int argc, char **argv)
|
|||||||
const char *keys =
|
const char *keys =
|
||||||
"{ h | help | false | print help message }"
|
"{ h | help | false | print help message }"
|
||||||
"{ t | type | gpu | set device type:cpu or gpu}"
|
"{ t | type | gpu | set device type:cpu or gpu}"
|
||||||
"{ p | platform | 0 | set platform id }"
|
"{ p | platform | -1 | set platform id }"
|
||||||
"{ d | device | 0 | set device id }";
|
"{ d | device | 0 | set device id }";
|
||||||
|
|
||||||
CommandLineParser cmd(argc, argv, keys);
|
CommandLineParser cmd(argc, argv, keys);
|
||||||
if (cmd.get<bool>("help"))
|
if (cmd.get<bool>("help"))
|
||||||
{
|
{
|
||||||
cout << "Avaible options besides goole test option:" << endl;
|
cout << "Available options besides google test option:" << endl;
|
||||||
cmd.printParams();
|
cmd.printParams();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
string type = cmd.get<string>("type");
|
string type = cmd.get<string>("type");
|
||||||
unsigned int pid = cmd.get<unsigned int>("platform");
|
int pid = cmd.get<int>("platform");
|
||||||
int device = cmd.get<int>("device");
|
int device = cmd.get<int>("device");
|
||||||
|
|
||||||
print_info();
|
print_info();
|
||||||
@ -100,24 +100,29 @@ int main(int argc, char **argv)
|
|||||||
{
|
{
|
||||||
flag = CVCL_DEVICE_TYPE_CPU;
|
flag = CVCL_DEVICE_TYPE_CPU;
|
||||||
}
|
}
|
||||||
std::vector<cv::ocl::Info> oclinfo;
|
|
||||||
int devnums = getDevice(oclinfo, flag);
|
cv::ocl::PlatformsInfo platformsInfo;
|
||||||
if(devnums <= device || device < 0)
|
cv::ocl::getOpenCLPlatforms(platformsInfo);
|
||||||
|
if (pid >= (int)platformsInfo.size())
|
||||||
{
|
{
|
||||||
std::cout << "device invalid\n";
|
std::cout << "platform is invalid\n";
|
||||||
return -1;
|
return 1;
|
||||||
}
|
|
||||||
if(pid >= oclinfo.size())
|
|
||||||
{
|
|
||||||
std::cout << "platform invalid\n";
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
setDevice(oclinfo[pid], device);
|
cv::ocl::DevicesInfo devicesInfo;
|
||||||
|
int devnums = cv::ocl::getOpenCLDevices(devicesInfo, flag, (pid < 0) ? NULL : platformsInfo[pid]);
|
||||||
|
if (device < 0 || device >= devnums)
|
||||||
|
{
|
||||||
|
std::cout << "device/platform invalid\n";
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
cv::ocl::setDevice(devicesInfo[device]);
|
||||||
setBinaryDiskCache(CACHE_UPDATE);
|
setBinaryDiskCache(CACHE_UPDATE);
|
||||||
|
|
||||||
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
|
cout << "Device type: " << type << endl
|
||||||
|
<< "Platform name: " << devicesInfo[device]->platform->platformName << endl
|
||||||
|
<< "Device name: " << devicesInfo[device]->deviceName << endl;
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,9 +107,6 @@ PERF_TEST_P(Size_MatType, SuperResolution_BTVL1_OCL,
|
|||||||
Combine(Values(szSmall64, szSmall128),
|
Combine(Values(szSmall64, szSmall128),
|
||||||
Values(MatType(CV_8UC1), MatType(CV_8UC3))))
|
Values(MatType(CV_8UC1), MatType(CV_8UC3))))
|
||||||
{
|
{
|
||||||
std::vector<cv::ocl::Info>info;
|
|
||||||
cv::ocl::getDevice(info);
|
|
||||||
|
|
||||||
declare.time(5 * 60);
|
declare.time(5 * 60);
|
||||||
|
|
||||||
const Size size = std::tr1::get<0>(GetParam());
|
const Size size = std::tr1::get<0>(GetParam());
|
||||||
|
@ -232,7 +232,7 @@ void btv_l1_device_ocl::calcBtvRegularization(const oclMat& src, oclMat& dst, in
|
|||||||
cl_mem c_btvRegWeights;
|
cl_mem c_btvRegWeights;
|
||||||
size_t count = btvWeights_size * sizeof(float);
|
size_t count = btvWeights_size * sizeof(float);
|
||||||
c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count);
|
c_btvRegWeights = openCLCreateBuffer(clCxt, CL_MEM_READ_ONLY, count);
|
||||||
int cl_safe_check = clEnqueueWriteBuffer((cl_command_queue)clCxt->oclCommandQueue(), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL);
|
int cl_safe_check = clEnqueueWriteBuffer(getClCommandQueue(clCxt), c_btvRegWeights, 1, 0, count, btvWeights_, 0, NULL, NULL);
|
||||||
CV_Assert(cl_safe_check == CL_SUCCESS);
|
CV_Assert(cl_safe_check == CL_SUCCESS);
|
||||||
|
|
||||||
args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data));
|
args.push_back(make_pair(sizeof(cl_mem), (void*)&src_.data));
|
||||||
|
@ -278,8 +278,6 @@ TEST_F(SuperResolution, BTVL1_GPU)
|
|||||||
#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL)
|
#if defined(HAVE_OPENCV_OCL) && defined(HAVE_OPENCL)
|
||||||
TEST_F(SuperResolution, BTVL1_OCL)
|
TEST_F(SuperResolution, BTVL1_OCL)
|
||||||
{
|
{
|
||||||
std::vector<cv::ocl::Info> infos;
|
|
||||||
cv::ocl::getDevice(infos);
|
|
||||||
RunTest(cv::superres::createSuperResolution_BTVL1_OCL());
|
RunTest(cv::superres::createSuperResolution_BTVL1_OCL());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user