Merge pull request #3518 from wangyan42164:ocl_cascade_detect
This commit is contained in:
commit
060d67517a
@ -1060,6 +1060,7 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
}
|
||||
|
||||
int nstages = (int)data.stages.size();
|
||||
int splitstage_ocl = 1;
|
||||
|
||||
if( featureType == FeatureEvaluator::HAAR )
|
||||
{
|
||||
@ -1071,11 +1072,11 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d",
|
||||
localsz.width, localsz.height, data.maxNodesPerTree);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D NODE_COUNT=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, data.maxNodesPerTree, splitstage_ocl, nstages, MAX_FACES);
|
||||
haarKernel.create("runHaarClassifier", ocl::objdetect::cascadedetect_oclsrc, opts);
|
||||
if( haarKernel.empty() )
|
||||
return false;
|
||||
@ -1083,7 +1084,6 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
|
||||
Rect normrect = haar->getNormRect();
|
||||
int sqofs = haar->getSquaresOffset();
|
||||
int splitstage_ocl = 1;
|
||||
|
||||
haarKernel.args((int)scales.size(),
|
||||
ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
|
||||
@ -1091,13 +1091,12 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
||||
|
||||
// cascade classifier
|
||||
splitstage_ocl, nstages,
|
||||
ocl::KernelArg::PtrReadOnly(ustages),
|
||||
ocl::KernelArg::PtrReadOnly(unodes),
|
||||
ocl::KernelArg::PtrReadOnly(uleaves),
|
||||
|
||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||
normrect, sqofs, data.origWinSize, (int)MAX_FACES);
|
||||
normrect, sqofs, data.origWinSize);
|
||||
ok = haarKernel.run(2, globalsize, localsize, true);
|
||||
}
|
||||
else if( featureType == FeatureEvaluator::LBP )
|
||||
@ -1113,16 +1112,16 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
{
|
||||
String opts;
|
||||
if (lbufSize.area())
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SUM_BUF_SIZE=%d -D SUM_BUF_STEP=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, lbufSize.area(), lbufSize.width, splitstage_ocl, nstages, MAX_FACES);
|
||||
else
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d", localsz.width, localsz.height);
|
||||
opts = format("-D LOCAL_SIZE_X=%d -D LOCAL_SIZE_Y=%d -D SPLIT_STAGE=%d -D N_STAGES=%d -D MAX_FACES=%d",
|
||||
localsz.width, localsz.height, splitstage_ocl, nstages, MAX_FACES);
|
||||
lbpKernel.create("runLBPClassifierStumpSimple", ocl::objdetect::cascadedetect_oclsrc, opts);
|
||||
if( lbpKernel.empty() )
|
||||
return false;
|
||||
}
|
||||
|
||||
int splitstage_ocl = 1;
|
||||
int subsetSize = (data.ncategories + 31)/32;
|
||||
lbpKernel.args((int)scales.size(),
|
||||
ocl::KernelArg::PtrReadOnly(bufs[0]), // scaleData
|
||||
@ -1130,14 +1129,13 @@ bool CascadeClassifierImpl::ocl_detectMultiScaleNoGrouping( const std::vector<fl
|
||||
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
||||
|
||||
// cascade classifier
|
||||
splitstage_ocl, nstages,
|
||||
ocl::KernelArg::PtrReadOnly(ustages),
|
||||
ocl::KernelArg::PtrReadOnly(unodes),
|
||||
ocl::KernelArg::PtrReadOnly(usubsets),
|
||||
subsetSize,
|
||||
|
||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||
data.origWinSize, (int)MAX_FACES);
|
||||
data.origWinSize);
|
||||
|
||||
ok = lbpKernel.run(2, globalsize, localsize, true);
|
||||
}
|
||||
|
@ -70,14 +70,12 @@ void runHaarClassifier(
|
||||
__global const int* sum,
|
||||
int _sumstep, int sumoffset,
|
||||
__global const OptHaarFeature* optfeatures,
|
||||
|
||||
int splitstage, int nstages,
|
||||
__global const Stage* stages,
|
||||
__global const Node* nodes,
|
||||
__global const float* leaves0,
|
||||
|
||||
volatile __global int* facepos,
|
||||
int4 normrect, int sqofs, int2 windowsize, int maxFaces)
|
||||
int4 normrect, int sqofs, int2 windowsize)
|
||||
{
|
||||
int lx = get_local_id(0);
|
||||
int ly = get_local_id(1);
|
||||
@ -165,7 +163,7 @@ void runHaarClassifier(
|
||||
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
|
||||
nf = nf > 0 ? nf : 1.f;
|
||||
|
||||
for( stageIdx = 0; stageIdx < splitstage; stageIdx++ )
|
||||
for( stageIdx = 0; stageIdx < SPLIT_STAGE; stageIdx++ )
|
||||
{
|
||||
int ntrees = stages[stageIdx].ntrees;
|
||||
float s = 0.f;
|
||||
@ -221,7 +219,7 @@ void runHaarClassifier(
|
||||
break;
|
||||
}
|
||||
|
||||
if( stageIdx == splitstage && (ystep == 1 || ((ix | iy) & 1) == 0) )
|
||||
if( stageIdx == SPLIT_STAGE && (ystep == 1 || ((ix | iy) & 1) == 0) )
|
||||
{
|
||||
int count = atomic_inc(lcount);
|
||||
lbuf[count] = (int)(ix | (iy << 8));
|
||||
@ -229,7 +227,7 @@ void runHaarClassifier(
|
||||
}
|
||||
}
|
||||
|
||||
for( stageIdx = splitstage; stageIdx < nstages; stageIdx++ )
|
||||
for( stageIdx = SPLIT_STAGE; stageIdx < N_STAGES; stageIdx++ )
|
||||
{
|
||||
int nrects = lcount[0];
|
||||
|
||||
@ -335,13 +333,13 @@ void runHaarClassifier(
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if( stageIdx == nstages )
|
||||
if( stageIdx == N_STAGES )
|
||||
{
|
||||
int nrects = lcount[0];
|
||||
if( lidx < nrects )
|
||||
{
|
||||
int nfaces = atomic_inc(facepos);
|
||||
if( nfaces < maxFaces )
|
||||
if( nfaces < MAX_FACES )
|
||||
{
|
||||
volatile __global int* face = facepos + 1 + nfaces*3;
|
||||
int val = lbuf[lidx];
|
||||
@ -364,15 +362,13 @@ __kernel void runLBPClassifierStumpSimple(
|
||||
__global const int* sum,
|
||||
int _sumstep, int sumoffset,
|
||||
__global const OptLBPFeature* optfeatures,
|
||||
|
||||
int splitstage, int nstages,
|
||||
__global const Stage* stages,
|
||||
__global const Stump* stumps,
|
||||
__global const int* bitsets,
|
||||
int bitsetSize,
|
||||
|
||||
volatile __global int* facepos,
|
||||
int2 windowsize, int maxFaces)
|
||||
int2 windowsize)
|
||||
{
|
||||
int lx = get_local_id(0);
|
||||
int ly = get_local_id(1);
|
||||
@ -381,7 +377,6 @@ __kernel void runLBPClassifierStumpSimple(
|
||||
int groupIdx = get_group_id(1)*get_num_groups(0) + get_group_id(0);
|
||||
int ngroups = get_num_groups(0)*get_num_groups(1);
|
||||
int scaleIdx, tileIdx, stageIdx;
|
||||
int startStage = 0, endStage = nstages;
|
||||
int sumstep = (int)(_sumstep/sizeof(int));
|
||||
|
||||
for( scaleIdx = nscales-1; scaleIdx >= 0; scaleIdx-- )
|
||||
@ -404,7 +399,7 @@ __kernel void runLBPClassifierStumpSimple(
|
||||
__global const Stump* stump = stumps;
|
||||
__global const int* bitset = bitsets;
|
||||
|
||||
for( stageIdx = 0; stageIdx < endStage; stageIdx++ )
|
||||
for( stageIdx = 0; stageIdx < N_STAGES; stageIdx++ )
|
||||
{
|
||||
int i, ntrees = stages[stageIdx].ntrees;
|
||||
float s = 0.f;
|
||||
@ -433,10 +428,10 @@ __kernel void runLBPClassifierStumpSimple(
|
||||
break;
|
||||
}
|
||||
|
||||
if( stageIdx == nstages )
|
||||
if( stageIdx == N_STAGES )
|
||||
{
|
||||
int nfaces = atomic_inc(facepos);
|
||||
if( nfaces < maxFaces )
|
||||
if( nfaces < MAX_FACES )
|
||||
{
|
||||
volatile __global int* face = facepos + 1 + nfaces*3;
|
||||
face[0] = scaleIdx;
|
||||
@ -455,15 +450,13 @@ void runLBPClassifierStump(
|
||||
__global const int* sum,
|
||||
int _sumstep, int sumoffset,
|
||||
__global const OptLBPFeature* optfeatures,
|
||||
|
||||
int splitstage, int nstages,
|
||||
__global const Stage* stages,
|
||||
__global const Stump* stumps,
|
||||
__global const int* bitsets,
|
||||
int bitsetSize,
|
||||
|
||||
volatile __global int* facepos,
|
||||
int2 windowsize, int maxFaces)
|
||||
int2 windowsize)
|
||||
{
|
||||
int lx = get_local_id(0);
|
||||
int ly = get_local_id(1);
|
||||
@ -525,7 +518,7 @@ void runLBPClassifierStump(
|
||||
__global const int* p = psum0 + mad24(iy, sumstep, ix);
|
||||
#endif
|
||||
|
||||
for( stageIdx = 0; stageIdx < splitstage; stageIdx++ )
|
||||
for( stageIdx = 0; stageIdx < SPLIT_STAGE; stageIdx++ )
|
||||
{
|
||||
int ntrees = stages[stageIdx].ntrees;
|
||||
float s = 0.f;
|
||||
@ -554,14 +547,14 @@ void runLBPClassifierStump(
|
||||
break;
|
||||
}
|
||||
|
||||
if( stageIdx == splitstage && (ystep == 1 || ((ix | iy) & 1) == 0) )
|
||||
if( stageIdx == SPLIT_STAGE && (ystep == 1 || ((ix | iy) & 1) == 0) )
|
||||
{
|
||||
int count = atomic_inc(lcount);
|
||||
lbuf[count] = (int)(ix | (iy << 8));
|
||||
}
|
||||
}
|
||||
|
||||
for( stageIdx = splitstage; stageIdx < nstages; stageIdx++ )
|
||||
for( stageIdx = SPLIT_STAGE; stageIdx < N_STAGES; stageIdx++ )
|
||||
{
|
||||
int nrects = lcount[0];
|
||||
|
||||
@ -639,13 +632,13 @@ void runLBPClassifierStump(
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if( stageIdx == nstages )
|
||||
if( stageIdx == N_STAGES )
|
||||
{
|
||||
int nrects = lcount[0];
|
||||
if( lidx < nrects )
|
||||
{
|
||||
int nfaces = atomic_inc(facepos);
|
||||
if( nfaces < maxFaces )
|
||||
if( nfaces < MAX_FACES )
|
||||
{
|
||||
volatile __global int* face = facepos + 1 + nfaces*3;
|
||||
int val = lbuf[lidx];
|
||||
|
Loading…
x
Reference in New Issue
Block a user