LBP classifier was refactored, added parameter for max size of detected object
This commit is contained in:
parent
e6f7e4d83e
commit
a9f2f522e7
@ -1435,7 +1435,8 @@ public:
|
||||
bool load(const std::string& filename);
|
||||
void release();
|
||||
|
||||
int detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4/*, Size minSize = Size()*/);
|
||||
int detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4,
|
||||
cv::Size maxObjectSize = cv::Size()/*, Size minSize = Size()*/);
|
||||
void preallocateIntegralBuffer(cv::Size desired);
|
||||
|
||||
bool findLargestObject;
|
||||
|
@ -48,20 +48,6 @@ using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
using namespace std;
|
||||
|
||||
struct Stage
|
||||
{
|
||||
int first;
|
||||
int ntrees;
|
||||
float threshold;
|
||||
};
|
||||
|
||||
struct DTreeNode
|
||||
{
|
||||
int featureIdx;
|
||||
int left;
|
||||
int right;
|
||||
};
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
// ============ old fashioned haar cascade ==============================================//
|
||||
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
|
||||
@ -128,6 +114,13 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml)
|
||||
#define GPU_CC_FEATURES "features"
|
||||
#define GPU_CC_RECT "rect"
|
||||
|
||||
struct Stage
|
||||
{
|
||||
int first;
|
||||
int ntrees;
|
||||
float threshold;
|
||||
};
|
||||
|
||||
// currently only stump based boost classifiers are supported
|
||||
bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
|
||||
{
|
||||
@ -279,12 +272,26 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace lbp
|
||||
{
|
||||
void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db features,
|
||||
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0);
|
||||
void classifyStump(const DevMem2Db mstages,
|
||||
const int nstages,
|
||||
const DevMem2Di mnodes,
|
||||
const DevMem2Df mleaves,
|
||||
const DevMem2Di msubsets,
|
||||
const DevMem2Db mfeatures,
|
||||
const DevMem2Di integral,
|
||||
const int workWidth,
|
||||
const int workHeight,
|
||||
const int clWidth,
|
||||
const int clHeight,
|
||||
float scale,
|
||||
int step,
|
||||
int subsetSize,
|
||||
DevMem2D_<int4> objects);
|
||||
}
|
||||
}}}
|
||||
|
||||
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, double scaleFactor, int minNeighbors /*, Size minSize=Size()*/)
|
||||
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects,
|
||||
double scaleFactor, int minNeighbors, cv::Size maxObjectSize /*, Size minSize=Size()*/)
|
||||
{
|
||||
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
|
||||
CV_Assert(!empty());
|
||||
@ -299,28 +306,35 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
|
||||
// temp solution
|
||||
objects.create(image.rows, image.cols, CV_32SC4);
|
||||
|
||||
scaledImageBuffer.create(image.size(), image.type());
|
||||
if (maxObjectSize == cv::Size())
|
||||
maxObjectSize = image.size();
|
||||
|
||||
scaledImageBuffer.create(image.rows + 1, image.cols + 1, CV_8U);
|
||||
|
||||
// TODO: specify max objects size
|
||||
for( double factor = 1; ; factor *= scaleFactor )
|
||||
{
|
||||
cv::Size windowSize(cvRound(NxM.width * factor), cvRound(NxM.height * factor));
|
||||
cv::Size scaledImageSize(cvRound( image.cols / factor ), cvRound( image.rows / factor ));
|
||||
cv::Size processingRectSize( scaledImageSize.width - NxM.width + 1, scaledImageSize.height - NxM.height + 1 );
|
||||
|
||||
// nothing to do
|
||||
if (processingRectSize.width <= 0 || processingRectSize.height <= 0 )
|
||||
break;
|
||||
// TODO: min max object sizes cheching
|
||||
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, INTER_NEAREST);
|
||||
//prepare image for evaluation
|
||||
|
||||
if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height )
|
||||
break;
|
||||
|
||||
// if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
|
||||
// continue;
|
||||
|
||||
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, CV_INTER_LINEAR);
|
||||
|
||||
integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1);
|
||||
cv::gpu::integral(scaledImageBuffer, integral);
|
||||
|
||||
int step = (factor <= 2.) + 1;
|
||||
|
||||
cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, features_mat,
|
||||
integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, minNeighbors);
|
||||
cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,
|
||||
integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects);
|
||||
}
|
||||
// TODO: reject levels
|
||||
|
||||
|
@ -46,54 +46,69 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace lbp
|
||||
{
|
||||
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2D_< ::cv::gpu::device::ClNode> nodes,
|
||||
const DevMem2Df leaves, const DevMem2Di subsets,
|
||||
const DevMem2D_<uchar4> features, const DevMem2Di integral, float step, int subsetSize, DevMem2D_<int4> objects, float scale, int clWidth, int clHeight)
|
||||
__global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features,
|
||||
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects)
|
||||
{
|
||||
unsigned int x = threadIdx.x * step;
|
||||
unsigned int y = blockIdx.x * step;
|
||||
int nodeOfs = 0, leafOfs = 0;
|
||||
::cv::gpu::device::Feature evaluator;
|
||||
int y = threadIdx.x * scale;
|
||||
int x = blockIdx.x * scale;
|
||||
|
||||
for (int s = 0; s < stages.cols; s++ )
|
||||
int i = 0;
|
||||
|
||||
int current_node = 0;
|
||||
int current_leave = 0;
|
||||
|
||||
LBP evaluator;
|
||||
for (int s = 0; s < nstages; s++ )
|
||||
{
|
||||
::cv::gpu::device::Stage stage = stages(0, s);
|
||||
int sum = 0;
|
||||
for (int w = 0; w < stage.ntrees; w++)
|
||||
{
|
||||
::cv::gpu::device::ClNode node = nodes(0, nodeOfs);
|
||||
uchar4 feature = features(0, node.featureIdx);
|
||||
float sum = 0;
|
||||
Stage stage = stages[s];
|
||||
|
||||
uchar c = evaluator(y, x, feature, integral);
|
||||
const int subsetIdx = (nodeOfs * subsetSize);
|
||||
int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1);
|
||||
sum += leaves(0, subsets(0, idx) );
|
||||
nodeOfs++;
|
||||
leafOfs += 2;
|
||||
for (int t = 0; t < stage.ntrees; t++)
|
||||
{
|
||||
ClNode node = nodes[current_node];
|
||||
|
||||
uchar4 feature = features[node.featureIdx];
|
||||
int c = evaluator(y, x, feature, integral);
|
||||
const int* subsetIdx = subsets + (current_node * subsetSize);
|
||||
|
||||
int idx = (subsetIdx[c >> 5] & ( 1 << (c & 31))) ? current_leave : current_leave + 1;
|
||||
sum += leaves[idx];
|
||||
current_node += 1;
|
||||
current_leave += 2;
|
||||
}
|
||||
|
||||
i = s;
|
||||
if (sum < stage.threshold)
|
||||
return;
|
||||
}
|
||||
|
||||
int4 rect;
|
||||
rect.x = roundf(x * scale);
|
||||
rect.y = roundf(y * scale);
|
||||
rect.z = roundf(clWidth * scale);
|
||||
rect.w = roundf(clHeight * scale);
|
||||
objects(blockIdx.x, threadIdx.x) = rect;
|
||||
rect.z = roundf(clWidth);
|
||||
rect.w = roundf(clHeight);
|
||||
|
||||
if(i >= 19)
|
||||
printf( "GPU detected [%d, %d] - [%d, %d]\n", rect.x, rect.y, rect.z, rect.w);
|
||||
|
||||
}
|
||||
|
||||
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db bnodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db bfeatures,
|
||||
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream)
|
||||
void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures,
|
||||
const DevMem2Di integral, const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize,
|
||||
DevMem2D_<int4> objects)
|
||||
{
|
||||
printf("CascadeClassify");
|
||||
int blocks = ceilf(workHeight / (float)step);
|
||||
int blocks = ceilf(workHeight / (float)step);
|
||||
int threads = ceilf(workWidth / (float)step);
|
||||
DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages);
|
||||
DevMem2D_<uchar4> features = (DevMem2D_<uchar4>)bfeatures;
|
||||
DevMem2D_< ::cv::gpu::device::ClNode> nodes = DevMem2D_< ::cv::gpu::device::ClNode>(bnodes);
|
||||
printf("blocks %d, threads %d\n", blocks, threads);
|
||||
|
||||
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, features, integral, step, subsetSize, objects, scale, clWidth, clHeight);
|
||||
Stage* stages = (Stage*)(mstages.ptr());
|
||||
ClNode* nodes = (ClNode*)(mnodes.ptr());
|
||||
const float* leaves = mleaves.ptr();
|
||||
const int* subsets = msubsets.ptr();
|
||||
const uchar4* features = (uchar4*)(mfeatures.ptr());
|
||||
|
||||
lbp_classify_stump<<<blocks, threads>>>(stages, nstages, nodes, leaves, subsets, features, integral,
|
||||
workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects);
|
||||
}
|
||||
}
|
||||
}}}
|
@ -44,62 +44,58 @@
|
||||
#define __OPENCV_GPU_DEVICE_LBP_HPP_
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
// #include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
// #include "opencv2/gpu/device/vec_traits.hpp"
|
||||
// #include "opencv2/gpu/device/vec_math.hpp"
|
||||
// #include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
// #include "opencv2/gpu/device/filters.hpp"
|
||||
|
||||
// #define CALC_SUM_(p0, p1, p2, p3, offset) \
|
||||
// ((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset])
|
||||
|
||||
// __device__ __forceinline__ int sum(p0, p1, p2, p3, offset)
|
||||
// {
|
||||
|
||||
// }
|
||||
|
||||
namespace cv { namespace gpu { namespace device {
|
||||
|
||||
namespace lbp{
|
||||
struct Stage
|
||||
{
|
||||
int first;
|
||||
int ntrees;
|
||||
float threshold;
|
||||
__device__ __forceinline__ Stage(int f = 0, int n = 0, float t = 0.f) : first(f), ntrees(n), threshold(t) {}
|
||||
__device__ __forceinline__ Stage(const Stage& other) : first(other.first), ntrees(other.ntrees), threshold(other.threshold) {}
|
||||
};
|
||||
|
||||
struct ClNode
|
||||
{
|
||||
int featureIdx;
|
||||
int left;
|
||||
int right;
|
||||
__device__ __forceinline__ ClNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {}
|
||||
__device__ __forceinline__ ClNode(const ClNode& other) : featureIdx(other.featureIdx), left(other.left), right(other.right) {}
|
||||
int featureIdx;
|
||||
};
|
||||
|
||||
struct Feature
|
||||
struct LBP
|
||||
{
|
||||
__device__ __forceinline__ Feature(const Feature& other) {(void)other;}
|
||||
__device__ __forceinline__ Feature() {}
|
||||
__device__ __forceinline__ LBP(const LBP& other) {(void)other;}
|
||||
__device__ __forceinline__ LBP() {}
|
||||
|
||||
//feature as uchar x, y - left top, z,w - right bottom
|
||||
__device__ __forceinline__ uchar operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const
|
||||
__device__ __forceinline__ int operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const
|
||||
{
|
||||
int x_off = 2 * feature.z;
|
||||
int y_off = 2 * feature.w;
|
||||
// printf("feature: %d %d %d %d\n", (int)feature.x, (int)feature.y, (int)feature.z, (int)feature.w);
|
||||
feature.z += feature.x;
|
||||
feature.w += feature.y;
|
||||
|
||||
// load feature key points
|
||||
int anchors[16];
|
||||
/*
|
||||
P0-----P1-----P2-----P3
|
||||
| | | |
|
||||
P4-----P5-----P6-----P7
|
||||
| | | |
|
||||
P8-----P9-----P10----P11
|
||||
| | | |
|
||||
P12----P13----P14----15
|
||||
*/
|
||||
anchors[0] = integral(y + feature.y, x + feature.x);
|
||||
anchors[1] = integral(y + feature.y, x + feature.z);
|
||||
anchors[2] = integral(y + feature.y, x + x_off + feature.x);
|
||||
anchors[3] = integral(y + feature.y, x + x_off + feature.z);
|
||||
anchors[2] = integral(y + feature.y, x + feature.x + x_off);
|
||||
anchors[3] = integral(y + feature.y, x + feature.z + x_off);
|
||||
|
||||
anchors[4] = integral(y + feature.w, x + feature.x);
|
||||
anchors[5] = integral(y + feature.w, x + feature.z);
|
||||
anchors[6] = integral(y + feature.w, x + x_off + feature.x);
|
||||
anchors[7] = integral(y + feature.w, x + x_off + feature.z);
|
||||
anchors[6] = integral(y + feature.w, x + feature.x + x_off);
|
||||
anchors[7] = integral(y + feature.w, x + feature.z + x_off);
|
||||
|
||||
anchors[8] = integral(y + y_off + feature.y, x + feature.x);
|
||||
anchors[9] = integral(y + y_off + feature.y, x + feature.z);
|
||||
@ -114,7 +110,7 @@ namespace cv { namespace gpu { namespace device {
|
||||
// calculate feature
|
||||
int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10];
|
||||
|
||||
uchar response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0)
|
||||
int response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0)
|
||||
|(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0)
|
||||
|(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0)
|
||||
|(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0)
|
||||
@ -122,11 +118,12 @@ namespace cv { namespace gpu { namespace device {
|
||||
|(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0)
|
||||
|(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0)
|
||||
|(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0);
|
||||
|
||||
return response;
|
||||
}
|
||||
|
||||
};
|
||||
} // lbp
|
||||
|
||||
|
||||
} } }// namespaces
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user