Use preprocessor for constant values in OpenCL kernel instead of
the parameter variable. It could improve the performance of OCL_Cascade_Image_MinSize_CascadeClassifier.CascadeClassifier/*. Especially, OCL_Cascade_Image_MinSize_CascadeClassifier.CascadeClassifier/15 OCL_Cascade_Image_MinSize_CascadeClassifier.CascadeClassifier/16 could be improved about 2% in Intel platform. Signed-off-by: Yan Wang <yan.wang@linux.intel.com>
This commit is contained in:
@@ -1060,6 +1060,7 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
}
|
||||
|
||||
int nstages = (int)data.stages.size();
|
||||
int splitstage_ocl = 1;
|
||||
|
||||
if( featureType == FeatureEvaluator::HAAR )
|
||||
{
|
||||
@@ -1071,11 +1072,11 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d",
|
||||
localsz.width, localsz.height, data.maxNodesPerTree);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
|
||||
haarKernel.create("runHaarClassifier", ocl::objdetect::cascadedetect_oclsrc, opts);
|
||||
if( haarKernel.empty() )
|
||||
return false;
|
||||
@@ -1083,7 +1084,6 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
|
||||
Rect normrect = haar->getNormRect();
|
||||
int sqofs = haar->getSquaresOffset();
|
||||
int splitstage_ocl = 1;
|
||||
|
||||
haarKernel.args((int)scales.size(),
|
||||
ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
|
||||
@@ -1091,13 +1091,12 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
||||
|
||||
// cascade classifier
|
||||
splitstage_ocl, nstages,
|
||||
ocl::KernelArg::PtrReadOnly(ustages),
|
||||
ocl::KernelArg::PtrReadOnly(unodes),
|
||||
ocl::KernelArg::PtrReadOnly(uleaves),
|
||||
|
||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||
normrect, sqofs, data.origWinSize, (int)MAX_FACES);
|
||||
normrect, sqofs, data.origWinSize);
|
||||
ok = haarKernel.run(2, globalsize, localsize, true);
|
||||
}
|
||||
else if( featureType == FeatureEvaluator::LBP )
|
||||
@@ -1113,16 +1112,16 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d", localsz.width, localsz.height);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, splitstage_ocl, nstages, MAX_FACES);
|
||||
lbpKernel.create("runLBPClassifierStumpSimple", ocl::objdetect::cascadedetect_oclsrc, opts);
|
||||
if( lbpKernel.empty() )
|
||||
return false;
|
||||
}
|
||||
|
||||
int splitstage_ocl = 1;
|
||||
int subsetSize = (data.ncategories + 31)/32;
|
||||
lbpKernel.args((int)scales.size(),
|
||||
ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
|
||||
@@ -1130,14 +1129,13 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
||||
|
||||
// cascade classifier
|
||||
splitstage_ocl, nstages,
|
||||
ocl::KernelArg::PtrReadOnly(ustages),
|
||||
ocl::KernelArg::PtrReadOnly(unodes),
|
||||
ocl::KernelArg::PtrReadOnly(usubsets),
|
||||
subsetSize,
|
||||
|
||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||
data.origWinSize, (int)MAX_FACES);
|
||||
data.origWinSize);
|
||||
|
||||
ok = lbpKernel.run(2, globalsize, localsize, true);
|
||||
}
|
||||
|
Reference in New Issue
Block a user