experiments
This commit is contained in:
@@ -2640,19 +2640,19 @@ static cl_command_queue getQueue(const Queue& q)
|
||||
/////////////////////////////////////////// KernelArg /////////////////////////////////////////////
|
||||
|
||||
KernelArg::KernelArg()
|
||||
: flags(0), m(0), obj(0), sz(0), wscale(1)
|
||||
: flags(0), m(0), obj(0), sz(0), wscale(1), iwscale(1)
|
||||
{
|
||||
}
|
||||
|
||||
KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, const void* _obj, size_t _sz)
|
||||
: flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale)
|
||||
KernelArg::KernelArg(int _flags, UMat* _m, int _wscale, int _iwscale, const void* _obj, size_t _sz)
|
||||
: flags(_flags), m(_m), obj(_obj), sz(_sz), wscale(_wscale), iwscale(_iwscale)
|
||||
{
|
||||
}
|
||||
|
||||
KernelArg KernelArg::Constant(const Mat& m)
|
||||
{
|
||||
CV_Assert(m.isContinuous());
|
||||
return KernelArg(CONSTANT, 0, 1, m.data, m.total()*m.elemSize());
|
||||
return KernelArg(CONSTANT, 0, 0, 0, m.data, m.total()*m.elemSize());
|
||||
}
|
||||
|
||||
/////////////////////////////////////////// Kernel /////////////////////////////////////////////
|
||||
@@ -2871,7 +2871,7 @@ int Kernel::set(int i, const KernelArg& arg)
|
||||
|
||||
if( !(arg.flags & KernelArg::NO_SIZE) )
|
||||
{
|
||||
int cols = u2d.cols*arg.wscale;
|
||||
int cols = u2d.cols*arg.wscale/arg.iwscale;
|
||||
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u2d.rows), &u2d.rows) == CL_SUCCESS);
|
||||
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(cols), &cols) == CL_SUCCESS);
|
||||
i += 2;
|
||||
@@ -2887,7 +2887,7 @@ int Kernel::set(int i, const KernelArg& arg)
|
||||
i += 4;
|
||||
if( !(arg.flags & KernelArg::NO_SIZE) )
|
||||
{
|
||||
int cols = u3d.cols*arg.wscale;
|
||||
int cols = u3d.cols*arg.wscale/arg.iwscale;
|
||||
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)i, sizeof(u3d.slices), &u3d.rows) == CL_SUCCESS);
|
||||
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+1), sizeof(u3d.rows), &u3d.rows) == CL_SUCCESS);
|
||||
CV_OclDbgAssert(clSetKernelArg(p->handle, (cl_uint)(i+2), sizeof(u3d.cols), &cols) == CL_SUCCESS);
|
||||
@@ -2915,7 +2915,7 @@ bool Kernel::run(int dims, size_t _globalsize[], size_t _localsize[],
|
||||
for (int i = 0; i < dims; i++)
|
||||
{
|
||||
size_t val = _localsize ? _localsize[i] :
|
||||
dims == 1 ? 64 : dims == 2 ? (16>>i) : dims == 3 ? (8>>(int)(i>0)) : 1;
|
||||
dims == 1 ? 64 : dims == 2 ? (i == 0 ? 256 : 8) : dims == 3 ? (8>>(int)(i>0)) : 1;
|
||||
CV_Assert( val > 0 );
|
||||
total *= _globalsize[i];
|
||||
globalsize[i] = ((_globalsize[i] + val - 1)/val)*val;
|
||||
@@ -4219,34 +4219,34 @@ const char* typeToStr(int type)
|
||||
{
|
||||
static const char* tab[]=
|
||||
{
|
||||
"uchar", "uchar2", "uchar3", "uchar4",
|
||||
"char", "char2", "char3", "char4",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",
|
||||
"short", "short2", "short3", "short4",
|
||||
"int", "int2", "int3", "int4",
|
||||
"float", "float2", "float3", "float4",
|
||||
"double", "double2", "double3", "double4",
|
||||
"?", "?", "?", "?"
|
||||
"uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
|
||||
"char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
|
||||
"short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
|
||||
"int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
|
||||
"float", "float2", "float3", "float4", 0, 0, 0, "float8", 0, 0, 0, 0, 0, 0, 0, "float16",
|
||||
"double", "double2", "double3", "double4", 0, 0, 0, "double8", 0, 0, 0, 0, 0, 0, 0, "double16",
|
||||
"?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
|
||||
};
|
||||
int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
|
||||
return cn > 4 ? "?" : tab[depth*4 + cn-1];
|
||||
return cn > 16 ? "?" : tab[depth*16 + cn-1];
|
||||
}
|
||||
|
||||
const char* memopTypeToStr(int type)
|
||||
{
|
||||
static const char* tab[] =
|
||||
{
|
||||
"uchar", "uchar2", "uchar3", "uchar4",
|
||||
"uchar", "uchar2", "uchar3", "uchar4",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",
|
||||
"int", "int2", "int3", "int4",
|
||||
"int", "int2", "int3", "int4",
|
||||
"ulong", "ulong2", "ulong3", "ulong4",
|
||||
"?", "?", "?", "?"
|
||||
"uchar", "uchar2", "uchar3", "uchar4", 0, 0, 0, "uchar8", 0, 0, 0, 0, 0, 0, 0, "uchar16",
|
||||
"char", "char2", "char3", "char4", 0, 0, 0, "char8", 0, 0, 0, 0, 0, 0, 0, "char16",
|
||||
"ushort", "ushort2", "ushort3", "ushort4",0, 0, 0, "ushort8", 0, 0, 0, 0, 0, 0, 0, "ushort16",
|
||||
"short", "short2", "short3", "short4", 0, 0, 0, "short8", 0, 0, 0, 0, 0, 0, 0, "short16",
|
||||
"int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
|
||||
"int", "int2", "int3", "int4", 0, 0, 0, "int8", 0, 0, 0, 0, 0, 0, 0, "int16",
|
||||
"ulong", "ulong2", "ulong3", "ulong4", 0, 0, 0, "ulong8", 0, 0, 0, 0, 0, 0, 0, "ulong16",
|
||||
"?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?", "?"
|
||||
};
|
||||
int cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
|
||||
return cn > 4 ? "?" : tab[depth*4 + cn-1];
|
||||
return cn > 16 ? "?" : tab[depth*16 + cn-1];
|
||||
}
|
||||
|
||||
const char* convertTypeStr(int sdepth, int ddepth, int cn, char* buf)
|
||||
@@ -4321,6 +4321,74 @@ String kernelToStr(InputArray _kernel, int ddepth)
|
||||
return cv::format(" -D COEFF=%s", func(kernel).c_str());
|
||||
}
|
||||
|
||||
#define PROCESS_SRC(src) \
|
||||
do \
|
||||
{ \
|
||||
if (!src.empty()) \
|
||||
{ \
|
||||
CV_Assert(src.isMat() || src.isUMat()); \
|
||||
int ctype = src.type(), ccn = CV_MAT_CN(ctype); \
|
||||
Size csize = src.size(); \
|
||||
cols.push_back(ccn * src.size().width); \
|
||||
if (ctype != type || csize != ssize) \
|
||||
return 1; \
|
||||
offsets.push_back(src.offset()); \
|
||||
steps.push_back(src.step()); \
|
||||
} \
|
||||
} \
|
||||
while ((void)0, 0)
|
||||
|
||||
int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
||||
InputArray src4, InputArray src5, InputArray src6,
|
||||
InputArray src7, InputArray src8, InputArray src9)
|
||||
{
|
||||
int type = src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
Size ssize = src1.size();
|
||||
const ocl::Device & d = ocl::Device::getDefault();
|
||||
|
||||
int vectorWidths[] = { d.preferredVectorWidthChar(), d.preferredVectorWidthChar(),
|
||||
d.preferredVectorWidthShort(), d.preferredVectorWidthShort(),
|
||||
d.preferredVectorWidthInt(), d.preferredVectorWidthFloat(),
|
||||
d.preferredVectorWidthDouble(), -1 }, width = vectorWidths[depth];
|
||||
CV_Assert(width >= 0);
|
||||
|
||||
if (ssize.width * cn < width)
|
||||
return 1;
|
||||
|
||||
std::vector<size_t> offsets, steps, cols;
|
||||
PROCESS_SRC(src1);
|
||||
PROCESS_SRC(src2);
|
||||
PROCESS_SRC(src3);
|
||||
PROCESS_SRC(src4);
|
||||
PROCESS_SRC(src5);
|
||||
PROCESS_SRC(src6);
|
||||
PROCESS_SRC(src7);
|
||||
PROCESS_SRC(src8);
|
||||
PROCESS_SRC(src9);
|
||||
|
||||
size_t size = offsets.size();
|
||||
std::vector<int> dividers(size, width);
|
||||
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
|
||||
dividers[i] >>= 1;
|
||||
|
||||
// default strategy
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
if (dividers[i] != width)
|
||||
{
|
||||
width = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
// another strategy
|
||||
// width = *std::min_element(dividers.begin(), dividers.end());
|
||||
|
||||
return width;
|
||||
}
|
||||
|
||||
#undef PROCESS_SRC
|
||||
|
||||
/////////////////////////////////////////// Image2D ////////////////////////////////////////////////////
|
||||
|
||||
struct Image2D::Impl
|
||||
|
Reference in New Issue
Block a user