Merge pull request #874 from pengx17:master_queryDeviceInfo_rewrite

This commit is contained in:
Vadim Pisarevsky 2013-05-13 23:03:24 +04:00 committed by OpenCV Buildbot
commit 14c50d2fce
3 changed files with 60 additions and 80 deletions

View File

@ -59,32 +59,28 @@ namespace cv
const char noImage2dOption [] = "-D DISABLE_IMAGE2D"; const char noImage2dOption [] = "-D DISABLE_IMAGE2D";
static char SURF_OPTIONS [1024] = ""; static bool use_image2d = false;
static bool USE_IMAGE2d = false;
static void openCLExecuteKernelSURF(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3], static void openCLExecuteKernelSURF(Context *clCxt , const char **source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth) size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{ {
char * pSURF_OPTIONS = SURF_OPTIONS; char optBuf [100] = {0};
static bool OPTION_INIT = false; char * optBufPtr = optBuf;
if(!OPTION_INIT) if( !use_image2d )
{ {
if( !USE_IMAGE2d ) strcat(optBufPtr, noImage2dOption);
{ optBufPtr += strlen(noImage2dOption);
strcat(pSURF_OPTIONS, noImage2dOption);
pSURF_OPTIONS += strlen(noImage2dOption);
}
size_t wave_size = 0;
queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
std::sprintf(pSURF_OPTIONS, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
OPTION_INIT = true;
} }
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS); cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
} }
} }
} }
static inline size_t divUp(size_t total, size_t grain) static inline size_t divUp(size_t total, size_t grain)
{ {
return (total + grain - 1) / grain; return (total + grain - 1) / grain;
@ -166,11 +162,11 @@ public:
{ {
bindImgTex(img, imgTex); bindImgTex(img, imgTex);
bindImgTex(surf_.sum, sumTex); bindImgTex(surf_.sum, sumTex);
USE_IMAGE2d = true; use_image2d = true;
} }
catch (const cv::Exception& e) catch (const cv::Exception& e)
{ {
USE_IMAGE2d = false; use_image2d = false;
if(e.code != CL_IMAGE_FORMAT_NOT_SUPPORTED && e.code != -217) if(e.code != CL_IMAGE_FORMAT_NOT_SUPPORTED && e.code != -217)
{ {
throw e; throw e;

View File

@ -125,16 +125,21 @@ namespace cv
bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext()); bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
// the enums are used to query device information // the enums are used to query device information
// currently only support wavefront size queries
enum DEVICE_INFO enum DEVICE_INFO
{ {
WAVEFRONT_SIZE, //in AMD speak WAVEFRONT_SIZE,
WARP_SIZE = WAVEFRONT_SIZE, //in nvidia speak IS_CPU_DEVICE
IS_CPU_DEVICE //check if the device is CPU
}; };
//info should have been pre-allocated template<DEVICE_INFO _it, typename _ty>
void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info); _ty queryDeviceInfo(cl_kernel kernel = NULL);
//only these three specializations are implemented at the moment
template<>
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
template<>
size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
template<>
bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
}//namespace ocl }//namespace ocl
}//namespace cv }//namespace cv

View File

@ -362,64 +362,43 @@ namespace cv
clFinish(Context::getContext()->impl->clCmdQueue); clFinish(Context::getContext()->impl->clCmdQueue);
} }
void queryDeviceInfo(DEVICE_INFO info_type, void* info) //template specializations of queryDeviceInfo
template<>
bool queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel)
{ {
static Info::Impl* impl = Context::getContext()->impl; Info::Impl* impl = Context::getContext()->impl;
switch(info_type) cl_device_type devicetype;
{ openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
case WAVEFRONT_SIZE: CL_DEVICE_TYPE, sizeof(cl_device_type),
{ &devicetype, NULL));
bool is_cpu = false; return (devicetype == CVCL_DEVICE_TYPE_CPU);
queryDeviceInfo(IS_CPU_DEVICE, &is_cpu); }
if(is_cpu)
{
*(int*)info = 1;
return;
}
#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
try
{
openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0],
CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0));
}
catch(const cv::Exception&)
#elif defined (CL_DEVICE_WARP_SIZE_NV)
const int EXT_LEN = 4096 + 1 ;
char extends_set[EXT_LEN];
size_t extends_size;
openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size));
extends_set[EXT_LEN - 1] = 0;
if(std::string(extends_set).find("cl_nv_device_attribute_query") != std::string::npos)
{
openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0],
CL_DEVICE_WARP_SIZE_NV, sizeof(size_t), info, 0));
}
else
#endif
{
// if no way left for us to query the warp size, we can get it from kernel group info
static const char * _kernel_string = "__kernel void test_func() {}";
cl_kernel kernel;
kernel = openCLGetKernelFromSource(Context::getContext(), &_kernel_string, "test_func");
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), info, NULL));
}
} template<typename _ty>
break; static _ty queryWavesize(cl_kernel kernel)
case IS_CPU_DEVICE: {
{ size_t info = 0;
cl_device_type devicetype; Info::Impl* impl = Context::getContext()->impl;
openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
CL_DEVICE_TYPE, sizeof(cl_device_type), if(is_cpu)
&devicetype, NULL)); {
*(bool*)info = (devicetype == CVCL_DEVICE_TYPE_CPU); return 1;
}
break;
default:
CV_Error(-1, "Invalid device info type");
break;
} }
CV_Assert(kernel != NULL);
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL));
return static_cast<_ty>(info);
}
template<>
size_t queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel)
{
return queryWavesize<size_t>(kernel);
}
template<>
int queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel)
{
return queryWavesize<int>(kernel);
} }
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size) void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)