used new stratehy in cv::accumulate**
This commit is contained in:
@@ -617,6 +617,11 @@ CV_EXPORTS int predictOptimalVectorWidth(InputArray src1, InputArray src2 = noAr
|
|||||||
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
|
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray(),
|
||||||
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
|
OclVectorStrategy strat = OCL_VECTOR_DEFAULT);
|
||||||
|
|
||||||
|
// with OCL_VECTOR_MAX strategy
|
||||||
|
CV_EXPORTS int predictOptimalVectorWidthMax(InputArray src1, InputArray src2 = noArray(), InputArray src3 = noArray(),
|
||||||
|
InputArray src4 = noArray(), InputArray src5 = noArray(), InputArray src6 = noArray(),
|
||||||
|
InputArray src7 = noArray(), InputArray src8 = noArray(), InputArray src9 = noArray());
|
||||||
|
|
||||||
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
|
CV_EXPORTS void buildOptionsAddMatrixDescription(String& buildOptions, const String& name, InputArray _m);
|
||||||
|
|
||||||
class CV_EXPORTS Image2D
|
class CV_EXPORTS Image2D
|
||||||
|
@@ -4462,6 +4462,7 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
|
|||||||
offsets.push_back(src.offset()); \
|
offsets.push_back(src.offset()); \
|
||||||
steps.push_back(src.step()); \
|
steps.push_back(src.step()); \
|
||||||
dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
|
dividers.push_back(ckercn * CV_ELEM_SIZE1(ctype)); \
|
||||||
|
kercns.push_back(ckercn); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
while ((void)0, 0)
|
while ((void)0, 0)
|
||||||
@@ -4483,13 +4484,13 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
|||||||
if (vectorWidths[0] == 1)
|
if (vectorWidths[0] == 1)
|
||||||
{
|
{
|
||||||
// it's heuristic
|
// it's heuristic
|
||||||
vectorWidths[0] = vectorWidths[1] = 4;
|
vectorWidths[CV_8U] = vectorWidths[CV_8S] = 16;
|
||||||
vectorWidths[2] = vectorWidths[3] = 2;
|
vectorWidths[CV_16U] = vectorWidths[CV_16S] = 8;
|
||||||
vectorWidths[4] = vectorWidths[5] = vectorWidths[6] = 4;
|
vectorWidths[CV_32S] = vectorWidths[CV_32F] = vectorWidths[CV_64F] = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<size_t> offsets, steps, cols;
|
std::vector<size_t> offsets, steps, cols;
|
||||||
std::vector<int> dividers;
|
std::vector<int> dividers, kercns;
|
||||||
PROCESS_SRC(src1);
|
PROCESS_SRC(src1);
|
||||||
PROCESS_SRC(src2);
|
PROCESS_SRC(src2);
|
||||||
PROCESS_SRC(src3);
|
PROCESS_SRC(src3);
|
||||||
@@ -4503,23 +4504,22 @@ int predictOptimalVectorWidth(InputArray src1, InputArray src2, InputArray src3,
|
|||||||
size_t size = offsets.size();
|
size_t size = offsets.size();
|
||||||
|
|
||||||
for (size_t i = 0; i < size; ++i)
|
for (size_t i = 0; i < size; ++i)
|
||||||
while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % dividers[i] != 0)
|
while (offsets[i] % dividers[i] != 0 || steps[i] % dividers[i] != 0 || cols[i] % kercns[i] != 0)
|
||||||
dividers[i] >>= 1;
|
dividers[i] >>= 1, kercns[i] >>= 1;
|
||||||
|
|
||||||
// default strategy
|
// default strategy
|
||||||
int kercn = *std::min_element(dividers.begin(), dividers.end());
|
int kercn = *std::min_element(kercns.begin(), kercns.end());
|
||||||
|
|
||||||
// another strategy
|
|
||||||
// for (size_t i = 0; i < size; ++i)
|
|
||||||
// if (dividers[i] != wsz)
|
|
||||||
// {
|
|
||||||
// kercn = 1;
|
|
||||||
// break;
|
|
||||||
// }
|
|
||||||
|
|
||||||
return kercn;
|
return kercn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int predictOptimalVectorWidthMax(InputArray src1, InputArray src2, InputArray src3,
|
||||||
|
InputArray src4, InputArray src5, InputArray src6,
|
||||||
|
InputArray src7, InputArray src8, InputArray src9)
|
||||||
|
{
|
||||||
|
return predictOptimalVectorWidth(src1, src2, src3, src4, src5, src6, src7, src8, src9, OCL_VECTOR_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
#undef PROCESS_SRC
|
#undef PROCESS_SRC
|
||||||
|
|
||||||
|
|
||||||
|
@@ -370,16 +370,9 @@ static bool ocl_accumulate( InputArray _src, InputArray _src2, InputOutputArray
|
|||||||
op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED);
|
op_type == ACCUMULATE_PRODUCT || op_type == ACCUMULATE_WEIGHTED);
|
||||||
|
|
||||||
const ocl::Device & dev = ocl::Device::getDefault();
|
const ocl::Device & dev = ocl::Device::getDefault();
|
||||||
int vectorWidths[] = { 4, 4, 2, 2, 1, 1, 1, -1 };
|
bool haveMask = !_mask.empty(), doubleSupport = dev.doubleFPConfig() > 0;
|
||||||
int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth();
|
int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), ddepth = _dst.depth();
|
||||||
int pcn = std::max(vectorWidths[sdepth], vectorWidths[ddepth]), sesz = CV_ELEM_SIZE(sdepth) * pcn,
|
int kercn = haveMask ? cn : ocl::predictOptimalVectorWidthMax(_src, _src2, _dst), rowsPerWI = dev.isIntel() ? 4 : 1;
|
||||||
desz = CV_ELEM_SIZE(ddepth) * pcn, rowsPerWI = dev.isIntel() ? 4 : 1;
|
|
||||||
|
|
||||||
bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(),
|
|
||||||
usepcn = _src.offset() % sesz == 0 && _src.step() % sesz == 0 && (_src.cols() * cn) % pcn == 0 &&
|
|
||||||
_src2.offset() % desz == 0 && _src2.step() % desz == 0 &&
|
|
||||||
_dst.offset() % pcn == 0 && _dst.step() % desz == 0 && !haveMask;
|
|
||||||
int kercn = usepcn ? pcn : haveMask ? cn : 1;
|
|
||||||
|
|
||||||
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
|
if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
|
||||||
return false;
|
return false;
|
||||||
|
Reference in New Issue
Block a user