added OpenCL optimization for LBP-based face detector
This commit is contained in:
parent
1540910542
commit
d084d19779
@ -743,6 +743,14 @@ bool LBPEvaluator::setWindow( Point pt )
|
|||||||
pwin = &sum.at<int>(pt);
|
pwin = &sum.at<int>(pt);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void LBPEvaluator::getUMats(std::vector<UMat>& bufs)
|
||||||
|
{
|
||||||
|
bufs.clear();
|
||||||
|
bufs.push_back(usum);
|
||||||
|
bufs.push_back(ufbuf);
|
||||||
|
}
|
||||||
|
|
||||||
//---------------------------------------------- HOGEvaluator ---------------------------------------
|
//---------------------------------------------- HOGEvaluator ---------------------------------------
|
||||||
bool HOGEvaluator::Feature :: read( const FileNode& node )
|
bool HOGEvaluator::Feature :: read( const FileNode& node )
|
||||||
@ -1162,50 +1170,84 @@ bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processin
|
|||||||
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
|
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
|
||||||
int yStep, double factor, Size sumSize0 )
|
int yStep, double factor, Size sumSize0 )
|
||||||
{
|
{
|
||||||
const int VECTOR_SIZE = 1;
|
int featureType = getFeatureType();
|
||||||
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
|
std::vector<UMat> bufs;
|
||||||
if( haar.empty() )
|
size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep };
|
||||||
return false;
|
bool ok = false;
|
||||||
|
|
||||||
haar->setImage(_image, data.origWinSize, sumSize0);
|
|
||||||
|
|
||||||
if( cascadeKernel.empty() )
|
|
||||||
{
|
|
||||||
cascadeKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc,
|
|
||||||
format("-D VECTOR_SIZE=%d", VECTOR_SIZE));
|
|
||||||
if( cascadeKernel.empty() )
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( ustages.empty() )
|
if( ustages.empty() )
|
||||||
{
|
{
|
||||||
copyVectorToUMat(data.stages, ustages);
|
copyVectorToUMat(data.stages, ustages);
|
||||||
copyVectorToUMat(data.stumps, ustumps);
|
copyVectorToUMat(data.stumps, ustumps);
|
||||||
|
if( !data.subsets.empty() )
|
||||||
|
copyVectorToUMat(data.subsets, usubsets);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<UMat> bufs;
|
if( featureType == FeatureEvaluator::HAAR )
|
||||||
haar->getUMats(bufs);
|
{
|
||||||
CV_Assert(bufs.size() == 3);
|
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
|
||||||
|
if( haar.empty() )
|
||||||
|
return false;
|
||||||
|
|
||||||
Rect normrect = haar->getNormRect();
|
haar->setImage(_image, data.origWinSize, sumSize0);
|
||||||
|
if( haarKernel.empty() )
|
||||||
|
{
|
||||||
|
haarKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
|
||||||
|
if( haarKernel.empty() )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
haar->getUMats(bufs);
|
||||||
|
Rect normrect = haar->getNormRect();
|
||||||
|
|
||||||
//processingRectSize = Size(yStep, yStep);
|
haarKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
|
||||||
size_t globalsize[] = { (processingRectSize.width/yStep + VECTOR_SIZE-1)/VECTOR_SIZE, processingRectSize.height/yStep };
|
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
|
||||||
|
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
||||||
|
|
||||||
cascadeKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
|
// cascade classifier
|
||||||
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
|
(int)data.stages.size(),
|
||||||
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
ocl::KernelArg::PtrReadOnly(ustages),
|
||||||
|
ocl::KernelArg::PtrReadOnly(ustumps),
|
||||||
|
|
||||||
// cascade classifier
|
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||||
(int)data.stages.size(),
|
processingRectSize,
|
||||||
ocl::KernelArg::PtrReadOnly(ustages),
|
yStep, (float)factor,
|
||||||
ocl::KernelArg::PtrReadOnly(ustumps),
|
normrect, data.origWinSize, MAX_FACES);
|
||||||
|
ok = haarKernel.run(2, globalsize, 0, true);
|
||||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
}
|
||||||
processingRectSize,
|
else if( featureType == FeatureEvaluator::LBP )
|
||||||
yStep, (float)factor,
|
{
|
||||||
normrect, data.origWinSize, MAX_FACES);
|
Ptr<LBPEvaluator> lbp = featureEvaluator.dynamicCast<LBPEvaluator>();
|
||||||
bool ok = cascadeKernel.run(2, globalsize, 0, true);
|
if( lbp.empty() )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
lbp->setImage(_image, data.origWinSize, sumSize0);
|
||||||
|
if( lbpKernel.empty() )
|
||||||
|
{
|
||||||
|
lbpKernel.create("runLBPClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
|
||||||
|
if( lbpKernel.empty() )
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
lbp->getUMats(bufs);
|
||||||
|
|
||||||
|
int subsetSize = (data.ncategories + 31)/32;
|
||||||
|
lbpKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
|
||||||
|
ocl::KernelArg::PtrReadOnly(bufs[1]), // optfeatures
|
||||||
|
|
||||||
|
// cascade classifier
|
||||||
|
(int)data.stages.size(),
|
||||||
|
ocl::KernelArg::PtrReadOnly(ustages),
|
||||||
|
ocl::KernelArg::PtrReadOnly(ustumps),
|
||||||
|
ocl::KernelArg::PtrReadOnly(usubsets),
|
||||||
|
subsetSize,
|
||||||
|
|
||||||
|
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||||
|
processingRectSize,
|
||||||
|
yStep, (float)factor,
|
||||||
|
data.origWinSize, MAX_FACES);
|
||||||
|
ok = lbpKernel.run(2, globalsize, 0, true);
|
||||||
|
}
|
||||||
//CV_Assert(ok);
|
//CV_Assert(ok);
|
||||||
return ok;
|
return ok;
|
||||||
}
|
}
|
||||||
@ -1254,6 +1296,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
|
|||||||
double scaleFactor, Size minObjectSize, Size maxObjectSize,
|
double scaleFactor, Size minObjectSize, Size maxObjectSize,
|
||||||
bool outputRejectLevels )
|
bool outputRejectLevels )
|
||||||
{
|
{
|
||||||
|
int featureType = getFeatureType();
|
||||||
Size imgsz = _image.size();
|
Size imgsz = _image.size();
|
||||||
int imgtype = _image.type();
|
int imgtype = _image.type();
|
||||||
|
|
||||||
@ -1267,7 +1310,8 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
|
|||||||
maxObjectSize = imgsz;
|
maxObjectSize = imgsz;
|
||||||
|
|
||||||
bool use_ocl = ocl::useOpenCL() &&
|
bool use_ocl = ocl::useOpenCL() &&
|
||||||
getFeatureType() == FeatureEvaluator::HAAR &&
|
(featureType == FeatureEvaluator::HAAR ||
|
||||||
|
featureType == FeatureEvaluator::LBP) &&
|
||||||
!isOldFormatCascade() &&
|
!isOldFormatCascade() &&
|
||||||
data.isStumpBased() &&
|
data.isStumpBased() &&
|
||||||
maskGenerator.empty() &&
|
maskGenerator.empty() &&
|
||||||
@ -1593,7 +1637,8 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
|
|||||||
bool CascadeClassifierImpl::read_(const FileNode& root)
|
bool CascadeClassifierImpl::read_(const FileNode& root)
|
||||||
{
|
{
|
||||||
tryOpenCL = true;
|
tryOpenCL = true;
|
||||||
cascadeKernel = ocl::Kernel();
|
haarKernel = ocl::Kernel();
|
||||||
|
lbpKernel = ocl::Kernel();
|
||||||
ustages.release();
|
ustages.release();
|
||||||
ustumps.release();
|
ustumps.release();
|
||||||
if( !data.read(root) )
|
if( !data.read(root) )
|
||||||
|
@ -149,7 +149,7 @@ protected:
|
|||||||
Ptr<MaskGenerator> maskGenerator;
|
Ptr<MaskGenerator> maskGenerator;
|
||||||
UMat ugrayImage, uimageBuffer;
|
UMat ugrayImage, uimageBuffer;
|
||||||
UMat ufacepos, ustages, ustumps, usubsets;
|
UMat ufacepos, ustages, ustumps, usubsets;
|
||||||
ocl::Kernel cascadeKernel;
|
ocl::Kernel haarKernel, lbpKernel;
|
||||||
bool tryOpenCL;
|
bool tryOpenCL;
|
||||||
|
|
||||||
Mutex mtx;
|
Mutex mtx;
|
||||||
@ -392,6 +392,7 @@ public:
|
|||||||
|
|
||||||
virtual bool setImage(InputArray image, Size _origWinSize, Size);
|
virtual bool setImage(InputArray image, Size _origWinSize, Size);
|
||||||
virtual bool setWindow(Point pt);
|
virtual bool setWindow(Point pt);
|
||||||
|
virtual void getUMats(std::vector<UMat>& bufs);
|
||||||
|
|
||||||
int operator()(int featureIdx) const
|
int operator()(int featureIdx) const
|
||||||
{ return optfeaturesPtr[featureIdx].calc(pwin); }
|
{ return optfeaturesPtr[featureIdx].calc(pwin); }
|
||||||
|
@ -105,7 +105,7 @@ __kernel void runHaarClassifierStump(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
__kernel void runLBPClassifierStump(
|
__kernel void runLBPClassifierStump(
|
||||||
__global const int* sum,
|
__global const int* sum,
|
||||||
int sumstep, int sumoffset,
|
int sumstep, int sumoffset,
|
||||||
@ -119,45 +119,48 @@ __kernel void runLBPClassifierStump(
|
|||||||
|
|
||||||
volatile __global int* facepos,
|
volatile __global int* facepos,
|
||||||
int2 imgsize, int xyscale, float factor,
|
int2 imgsize, int xyscale, float factor,
|
||||||
int4 normrect, int2 windowsize, int maxFaces)
|
int2 windowsize, int maxFaces)
|
||||||
{
|
{
|
||||||
int ix = get_global_id(0)*xyscale;
|
int ix = get_global_id(0)*xyscale;
|
||||||
int iy = get_global_id(1)*xyscale;
|
int iy = get_global_id(1)*xyscale;
|
||||||
sumstep /= sizeof(int);
|
sumstep /= sizeof(int);
|
||||||
sqsumstep /= sizeof(int);
|
|
||||||
|
|
||||||
if( ix < imgsize.x && iy < imgsize.y )
|
if( ix < imgsize.x && iy < imgsize.y )
|
||||||
{
|
{
|
||||||
int stageIdx;
|
int stageIdx;
|
||||||
__global const Stump* stump = stumps;
|
__global const Stump* stump = stumps;
|
||||||
|
__global const int* p = sum + mad24(iy, sumstep, ix);
|
||||||
|
|
||||||
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
|
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
|
||||||
{
|
{
|
||||||
int i, ntrees = stages[stageIdx].ntrees;
|
int i, ntrees = stages[stageIdx].ntrees;
|
||||||
float s = 0.f;
|
float s = 0.f;
|
||||||
for( i = 0; i < ntrees; i++, stump++ )
|
for( i = 0; i < ntrees; i++, stump++, bitsets += bitsetSize )
|
||||||
{
|
{
|
||||||
float4 st = stump->st;
|
float4 st = stump->st;
|
||||||
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
|
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
|
||||||
int16 ofs = f->ofs;
|
int16 ofs = f->ofs;
|
||||||
|
|
||||||
|
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
|
||||||
|
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
|
||||||
|
|
||||||
|
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
|
||||||
|
|
||||||
int4 ofs = f->ofs[0];
|
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
|
||||||
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
|
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
|
||||||
ofs = f->ofs[1];
|
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
|
||||||
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
|
|
||||||
if( weight.z > 0 )
|
|
||||||
{
|
|
||||||
ofs = f->ofs[2];
|
|
||||||
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
|
|
||||||
}
|
|
||||||
|
|
||||||
s += (sval < st.y*nf) ? st.z : st.w;
|
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
|
||||||
|
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
|
||||||
|
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
|
||||||
|
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
|
||||||
|
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
|
||||||
|
|
||||||
|
s += (bitsets[idx] & (1 << mask)) ? st.z : st.w;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( s < stages[stageIdx].threshold )
|
if( s < stages[stageIdx].threshold )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( stageIdx == nstages )
|
if( stageIdx == nstages )
|
||||||
@ -174,4 +177,4 @@ __kernel void runLBPClassifierStump(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user