Merge pull request #882 from pengx17:2.4_rewrite_query_info
This commit is contained in:
commit
cbbc82a789
@ -60,27 +60,24 @@ namespace cv
|
||||
|
||||
const char noImage2dOption [] = "-D DISABLE_IMAGE2D";
|
||||
|
||||
static char SURF_OPTIONS [1024] = "";
|
||||
static bool USE_IMAGE2d = false;
|
||||
static bool use_image2d = false;
|
||||
|
||||
static void openCLExecuteKernelSURF(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, int depth)
|
||||
{
|
||||
char * pSURF_OPTIONS = SURF_OPTIONS;
|
||||
static bool OPTION_INIT = false;
|
||||
if(!OPTION_INIT)
|
||||
char optBuf [100] = {0};
|
||||
char * optBufPtr = optBuf;
|
||||
if( !use_image2d )
|
||||
{
|
||||
if( !USE_IMAGE2d )
|
||||
{
|
||||
strcat(pSURF_OPTIONS, noImage2dOption);
|
||||
pSURF_OPTIONS += strlen(noImage2dOption);
|
||||
}
|
||||
|
||||
size_t wave_size = 0;
|
||||
queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
|
||||
std::sprintf(pSURF_OPTIONS, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
|
||||
OPTION_INIT = true;
|
||||
strcat(optBufPtr, noImage2dOption);
|
||||
optBufPtr += strlen(noImage2dOption);
|
||||
}
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, SURF_OPTIONS);
|
||||
cl_kernel kernel;
|
||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
|
||||
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>(kernel);
|
||||
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
|
||||
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -161,22 +158,12 @@ public:
|
||||
counters.setTo(Scalar::all(0));
|
||||
|
||||
integral(img, surf_.sum);
|
||||
if(support_image2d())
|
||||
use_image2d = support_image2d();
|
||||
if(use_image2d)
|
||||
{
|
||||
try
|
||||
{
|
||||
bindImgTex(img, imgTex);
|
||||
bindImgTex(surf_.sum, sumTex);
|
||||
USE_IMAGE2d = true;
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
{
|
||||
USE_IMAGE2d = false;
|
||||
if(e.code != CL_IMAGE_FORMAT_NOT_SUPPORTED && e.code != -217)
|
||||
{
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
bindImgTex(img, imgTex);
|
||||
bindImgTex(surf_.sum, sumTex);
|
||||
finish();
|
||||
}
|
||||
|
||||
maskSumTex = 0;
|
||||
|
@ -128,11 +128,17 @@ namespace cv
|
||||
enum DEVICE_INFO
|
||||
{
|
||||
WAVEFRONT_SIZE, //in AMD speak
|
||||
WARP_SIZE = WAVEFRONT_SIZE, //in nvidia speak
|
||||
IS_CPU_DEVICE //check if the device is CPU
|
||||
};
|
||||
template<DEVICE_INFO _it, typename _ty>
|
||||
_ty queryDeviceInfo(cl_kernel kernel = NULL);
|
||||
//info should have been pre-allocated
|
||||
void CV_EXPORTS queryDeviceInfo(DEVICE_INFO info_type, void* info);
|
||||
template<>
|
||||
int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
|
||||
template<>
|
||||
size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
|
||||
template<>
|
||||
bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
|
||||
|
||||
}//namespace ocl
|
||||
|
||||
|
@ -1578,8 +1578,7 @@ static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string
|
||||
size_t globalThreads[3], size_t localThreads[3],
|
||||
vector< pair<size_t, const void *> > &args)
|
||||
{
|
||||
size_t wave_size = 0;
|
||||
queryDeviceInfo(WAVEFRONT_SIZE, &wave_size);
|
||||
size_t wave_size = queryDeviceInfo<WAVEFRONT_SIZE, size_t>();
|
||||
if (wave_size <= 16)
|
||||
{
|
||||
char build_options[64];
|
||||
|
@ -363,64 +363,43 @@ namespace cv
|
||||
clFinish(Context::getContext()->impl->clCmdQueue);
|
||||
}
|
||||
|
||||
void queryDeviceInfo(DEVICE_INFO info_type, void* info)
|
||||
//template specializations of queryDeviceInfo
|
||||
template<>
|
||||
bool queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel)
|
||||
{
|
||||
static Info::Impl* impl = Context::getContext()->impl;
|
||||
switch(info_type)
|
||||
{
|
||||
case WAVEFRONT_SIZE:
|
||||
{
|
||||
bool is_cpu = false;
|
||||
queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
|
||||
if(is_cpu)
|
||||
{
|
||||
*(int*)info = 1;
|
||||
return;
|
||||
}
|
||||
#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
|
||||
try
|
||||
{
|
||||
openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0],
|
||||
CL_DEVICE_WAVEFRONT_WIDTH_AMD, sizeof(size_t), info, 0));
|
||||
}
|
||||
catch(const cv::Exception&)
|
||||
#elif defined (CL_DEVICE_WARP_SIZE_NV)
|
||||
const int EXT_LEN = 4096 + 1 ;
|
||||
char extends_set[EXT_LEN];
|
||||
size_t extends_size;
|
||||
openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum], CL_DEVICE_EXTENSIONS, EXT_LEN, (void *)extends_set, &extends_size));
|
||||
extends_set[EXT_LEN - 1] = 0;
|
||||
if(std::string(extends_set).find("cl_nv_device_attribute_query") != std::string::npos)
|
||||
{
|
||||
openCLSafeCall(clGetDeviceInfo(Context::getContext()->impl->devices[0],
|
||||
CL_DEVICE_WARP_SIZE_NV, sizeof(size_t), info, 0));
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
// if no way left for us to query the warp size, we can get it from kernel group info
|
||||
static const char * _kernel_string = "__kernel void test_func() {}";
|
||||
cl_kernel kernel;
|
||||
kernel = openCLGetKernelFromSource(Context::getContext(), &_kernel_string, "test_func");
|
||||
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
|
||||
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), info, NULL));
|
||||
}
|
||||
Info::Impl* impl = Context::getContext()->impl;
|
||||
cl_device_type devicetype;
|
||||
openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
|
||||
CL_DEVICE_TYPE, sizeof(cl_device_type),
|
||||
&devicetype, NULL));
|
||||
return (devicetype == CVCL_DEVICE_TYPE_CPU);
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
case IS_CPU_DEVICE:
|
||||
{
|
||||
cl_device_type devicetype;
|
||||
openCLSafeCall(clGetDeviceInfo(impl->devices[impl->devnum],
|
||||
CL_DEVICE_TYPE, sizeof(cl_device_type),
|
||||
&devicetype, NULL));
|
||||
*(bool*)info = (devicetype == CVCL_DEVICE_TYPE_CPU);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
CV_Error(-1, "Invalid device info type");
|
||||
break;
|
||||
template<typename _ty>
|
||||
static _ty queryWavesize(cl_kernel kernel)
|
||||
{
|
||||
size_t info = 0;
|
||||
Info::Impl* impl = Context::getContext()->impl;
|
||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
||||
if(is_cpu)
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
CV_Assert(kernel != NULL);
|
||||
openCLSafeCall(clGetKernelWorkGroupInfo(kernel, impl->devices[impl->devnum],
|
||||
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(size_t), &info, NULL));
|
||||
return static_cast<_ty>(info);
|
||||
}
|
||||
|
||||
template<>
|
||||
size_t queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel)
|
||||
{
|
||||
return queryWavesize<size_t>(kernel);
|
||||
}
|
||||
template<>
|
||||
int queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel)
|
||||
{
|
||||
return queryWavesize<int>(kernel);
|
||||
}
|
||||
|
||||
void openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size)
|
||||
|
@ -187,8 +187,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&iters ));
|
||||
args.push_back( make_pair( sizeof(cl_char), (void *)&calcErr ));
|
||||
|
||||
bool is_cpu;
|
||||
queryDeviceInfo(IS_CPU_DEVICE, &is_cpu);
|
||||
bool is_cpu = queryDeviceInfo<IS_CPU_DEVICE, bool>();
|
||||
if (is_cpu)
|
||||
{
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, args, I.oclchannels(), I.depth(), (char*)" -D CPU");
|
||||
|
Loading…
x
Reference in New Issue
Block a user