LBP classifier was refactored, added parameter for max size of detected object

This commit is contained in:
Marina Kolpakova 2012-07-02 08:08:11 +00:00
parent e6f7e4d83e
commit a9f2f522e7
4 changed files with 113 additions and 86 deletions

View File

@ -1435,7 +1435,8 @@ public:
bool load(const std::string& filename); bool load(const std::string& filename);
void release(); void release();
int detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4/*, Size minSize = Size()*/); int detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objectsBuf, double scaleFactor = 1.1, int minNeighbors = 4,
cv::Size maxObjectSize = cv::Size()/*, Size minSize = Size()*/);
void preallocateIntegralBuffer(cv::Size desired); void preallocateIntegralBuffer(cv::Size desired);
bool findLargestObject; bool findLargestObject;

View File

@ -48,20 +48,6 @@ using namespace cv;
using namespace cv::gpu; using namespace cv::gpu;
using namespace std; using namespace std;
struct Stage
{
int first;
int ntrees;
float threshold;
};
struct DTreeNode
{
int featureIdx;
int left;
int right;
};
#if !defined (HAVE_CUDA) #if !defined (HAVE_CUDA)
// ============ old fashioned haar cascade ==============================================// // ============ old fashioned haar cascade ==============================================//
cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); } cv::gpu::CascadeClassifier_GPU::CascadeClassifier_GPU() { throw_nogpu(); }
@ -128,6 +114,13 @@ bool cv::gpu::CascadeClassifier_GPU_LBP::load(const string& classifierAsXml)
#define GPU_CC_FEATURES "features" #define GPU_CC_FEATURES "features"
#define GPU_CC_RECT "rect" #define GPU_CC_RECT "rect"
struct Stage
{
int first;
int ntrees;
float threshold;
};
// currently only stump based boost classifiers are supported // currently only stump based boost classifiers are supported
bool CascadeClassifier_GPU_LBP::read(const FileNode &root) bool CascadeClassifier_GPU_LBP::read(const FileNode &root)
{ {
@ -279,12 +272,26 @@ namespace cv { namespace gpu { namespace device
{ {
namespace lbp namespace lbp
{ {
void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db features, void classifyStump(const DevMem2Db mstages,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0); const int nstages,
const DevMem2Di mnodes,
const DevMem2Df mleaves,
const DevMem2Di msubsets,
const DevMem2Db mfeatures,
const DevMem2Di integral,
const int workWidth,
const int workHeight,
const int clWidth,
const int clHeight,
float scale,
int step,
int subsetSize,
DevMem2D_<int4> objects);
} }
}}} }}}
int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects, double scaleFactor, int minNeighbors /*, Size minSize=Size()*/) int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, GpuMat& scaledImageBuffer, GpuMat& objects,
double scaleFactor, int minNeighbors, cv::Size maxObjectSize /*, Size minSize=Size()*/)
{ {
CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
CV_Assert(!empty()); CV_Assert(!empty());
@ -299,28 +306,35 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
// temp solution // temp solution
objects.create(image.rows, image.cols, CV_32SC4); objects.create(image.rows, image.cols, CV_32SC4);
scaledImageBuffer.create(image.size(), image.type()); if (maxObjectSize == cv::Size())
maxObjectSize = image.size();
scaledImageBuffer.create(image.rows + 1, image.cols + 1, CV_8U);
// TODO: specify max objects size
for( double factor = 1; ; factor *= scaleFactor ) for( double factor = 1; ; factor *= scaleFactor )
{ {
cv::Size windowSize(cvRound(NxM.width * factor), cvRound(NxM.height * factor)); cv::Size windowSize(cvRound(NxM.width * factor), cvRound(NxM.height * factor));
cv::Size scaledImageSize(cvRound( image.cols / factor ), cvRound( image.rows / factor )); cv::Size scaledImageSize(cvRound( image.cols / factor ), cvRound( image.rows / factor ));
cv::Size processingRectSize( scaledImageSize.width - NxM.width + 1, scaledImageSize.height - NxM.height + 1 ); cv::Size processingRectSize( scaledImageSize.width - NxM.width + 1, scaledImageSize.height - NxM.height + 1 );
// nothing to do
if (processingRectSize.width <= 0 || processingRectSize.height <= 0 ) if (processingRectSize.width <= 0 || processingRectSize.height <= 0 )
break; break;
// TODO: min max object sizes cheching
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, INTER_NEAREST); if( windowSize.width > maxObjectSize.width || windowSize.height > maxObjectSize.height )
//prepare image for evaluation break;
// if( windowSize.width < minObjectSize.width || windowSize.height < minObjectSize.height )
// continue;
cv::gpu::resize(image, scaledImageBuffer, scaledImageSize, 0, 0, CV_INTER_LINEAR);
integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1); integral.create(cv::Size(scaledImageSize.width + 1, scaledImageSize.height + 1), CV_32SC1);
cv::gpu::integral(scaledImageBuffer, integral); cv::gpu::integral(scaledImageBuffer, integral);
int step = (factor <= 2.) + 1; int step = (factor <= 2.) + 1;
cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat, features_mat, cv::gpu::device::lbp::classifyStump(stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, leaves_mat, subsets_mat, features_mat,
integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects, minNeighbors); integral, processingRectSize.width, processingRectSize.height, windowSize.width, windowSize.height, scaleFactor, step, subsetSize, objects);
} }
// TODO: reject levels // TODO: reject levels

View File

@ -46,54 +46,69 @@ namespace cv { namespace gpu { namespace device
{ {
namespace lbp namespace lbp
{ {
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2D_< ::cv::gpu::device::ClNode> nodes, __global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features,
const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects)
const DevMem2D_<uchar4> features, const DevMem2Di integral, float step, int subsetSize, DevMem2D_<int4> objects, float scale, int clWidth, int clHeight)
{ {
unsigned int x = threadIdx.x * step; int y = threadIdx.x * scale;
unsigned int y = blockIdx.x * step; int x = blockIdx.x * scale;
int nodeOfs = 0, leafOfs = 0;
::cv::gpu::device::Feature evaluator;
for (int s = 0; s < stages.cols; s++ ) int i = 0;
{
::cv::gpu::device::Stage stage = stages(0, s);
int sum = 0;
for (int w = 0; w < stage.ntrees; w++)
{
::cv::gpu::device::ClNode node = nodes(0, nodeOfs);
uchar4 feature = features(0, node.featureIdx);
uchar c = evaluator(y, x, feature, integral); int current_node = 0;
const int subsetIdx = (nodeOfs * subsetSize); int current_leave = 0;
int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1);
sum += leaves(0, subsets(0, idx) ); LBP evaluator;
nodeOfs++; for (int s = 0; s < nstages; s++ )
leafOfs += 2; {
float sum = 0;
Stage stage = stages[s];
for (int t = 0; t < stage.ntrees; t++)
{
ClNode node = nodes[current_node];
uchar4 feature = features[node.featureIdx];
int c = evaluator(y, x, feature, integral);
const int* subsetIdx = subsets + (current_node * subsetSize);
int idx = (subsetIdx[c >> 5] & ( 1 << (c & 31))) ? current_leave : current_leave + 1;
sum += leaves[idx];
current_node += 1;
current_leave += 2;
} }
i = s;
if (sum < stage.threshold) if (sum < stage.threshold)
return; return;
} }
int4 rect; int4 rect;
rect.x = roundf(x * scale); rect.x = roundf(x * scale);
rect.y = roundf(y * scale); rect.y = roundf(y * scale);
rect.z = roundf(clWidth * scale); rect.z = roundf(clWidth);
rect.w = roundf(clHeight * scale); rect.w = roundf(clHeight);
objects(blockIdx.x, threadIdx.x) = rect;
if(i >= 19)
printf( "GPU detected [%d, %d] - [%d, %d]\n", rect.x, rect.y, rect.z, rect.w);
} }
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db bnodes, const DevMem2Df leaves, const DevMem2Di subsets, const DevMem2Db bfeatures, void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures,
const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream) const DevMem2Di integral, const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize,
DevMem2D_<int4> objects)
{ {
printf("CascadeClassify");
int blocks = ceilf(workHeight / (float)step); int blocks = ceilf(workHeight / (float)step);
int threads = ceilf(workWidth / (float)step); int threads = ceilf(workWidth / (float)step);
DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages); printf("blocks %d, threads %d\n", blocks, threads);
DevMem2D_<uchar4> features = (DevMem2D_<uchar4>)bfeatures;
DevMem2D_< ::cv::gpu::device::ClNode> nodes = DevMem2D_< ::cv::gpu::device::ClNode>(bnodes);
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, features, integral, step, subsetSize, objects, scale, clWidth, clHeight); Stage* stages = (Stage*)(mstages.ptr());
ClNode* nodes = (ClNode*)(mnodes.ptr());
const float* leaves = mleaves.ptr();
const int* subsets = msubsets.ptr();
const uchar4* features = (uchar4*)(mfeatures.ptr());
lbp_classify_stump<<<blocks, threads>>>(stages, nstages, nodes, leaves, subsets, features, integral,
workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects);
} }
} }
}}} }}}

View File

@ -44,62 +44,58 @@
#define __OPENCV_GPU_DEVICE_LBP_HPP_ #define __OPENCV_GPU_DEVICE_LBP_HPP_
#include "internal_shared.hpp" #include "internal_shared.hpp"
// #include "opencv2/gpu/device/border_interpolate.hpp"
// #include "opencv2/gpu/device/vec_traits.hpp"
// #include "opencv2/gpu/device/vec_math.hpp"
// #include "opencv2/gpu/device/saturate_cast.hpp"
// #include "opencv2/gpu/device/filters.hpp"
// #define CALC_SUM_(p0, p1, p2, p3, offset) \
// ((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset])
// __device__ __forceinline__ int sum(p0, p1, p2, p3, offset)
// {
// }
namespace cv { namespace gpu { namespace device { namespace cv { namespace gpu { namespace device {
namespace lbp{
struct Stage struct Stage
{ {
int first; int first;
int ntrees; int ntrees;
float threshold; float threshold;
__device__ __forceinline__ Stage(int f = 0, int n = 0, float t = 0.f) : first(f), ntrees(n), threshold(t) {}
__device__ __forceinline__ Stage(const Stage& other) : first(other.first), ntrees(other.ntrees), threshold(other.threshold) {}
}; };
struct ClNode struct ClNode
{ {
int featureIdx;
int left; int left;
int right; int right;
__device__ __forceinline__ ClNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {} int featureIdx;
__device__ __forceinline__ ClNode(const ClNode& other) : featureIdx(other.featureIdx), left(other.left), right(other.right) {}
}; };
struct Feature struct LBP
{ {
__device__ __forceinline__ Feature(const Feature& other) {(void)other;} __device__ __forceinline__ LBP(const LBP& other) {(void)other;}
__device__ __forceinline__ Feature() {} __device__ __forceinline__ LBP() {}
//feature as uchar x, y - left top, z,w - right bottom //feature as uchar x, y - left top, z,w - right bottom
__device__ __forceinline__ uchar operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const __device__ __forceinline__ int operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const
{ {
int x_off = 2 * feature.z; int x_off = 2 * feature.z;
int y_off = 2 * feature.w; int y_off = 2 * feature.w;
// printf("feature: %d %d %d %d\n", (int)feature.x, (int)feature.y, (int)feature.z, (int)feature.w);
feature.z += feature.x;
feature.w += feature.y;
// load feature key points // load feature key points
int anchors[16]; int anchors[16];
/*
P0-----P1-----P2-----P3
| | | |
P4-----P5-----P6-----P7
| | | |
P8-----P9-----P10----P11
| | | |
P12----P13----P14----15
*/
anchors[0] = integral(y + feature.y, x + feature.x); anchors[0] = integral(y + feature.y, x + feature.x);
anchors[1] = integral(y + feature.y, x + feature.z); anchors[1] = integral(y + feature.y, x + feature.z);
anchors[2] = integral(y + feature.y, x + x_off + feature.x); anchors[2] = integral(y + feature.y, x + feature.x + x_off);
anchors[3] = integral(y + feature.y, x + x_off + feature.z); anchors[3] = integral(y + feature.y, x + feature.z + x_off);
anchors[4] = integral(y + feature.w, x + feature.x); anchors[4] = integral(y + feature.w, x + feature.x);
anchors[5] = integral(y + feature.w, x + feature.z); anchors[5] = integral(y + feature.w, x + feature.z);
anchors[6] = integral(y + feature.w, x + x_off + feature.x); anchors[6] = integral(y + feature.w, x + feature.x + x_off);
anchors[7] = integral(y + feature.w, x + x_off + feature.z); anchors[7] = integral(y + feature.w, x + feature.z + x_off);
anchors[8] = integral(y + y_off + feature.y, x + feature.x); anchors[8] = integral(y + y_off + feature.y, x + feature.x);
anchors[9] = integral(y + y_off + feature.y, x + feature.z); anchors[9] = integral(y + y_off + feature.y, x + feature.z);
@ -114,7 +110,7 @@ namespace cv { namespace gpu { namespace device {
// calculate feature // calculate feature
int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10]; int sum = anchors[5] - anchors[6] - anchors[9] + anchors[10];
uchar response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0) int response = (( (anchors[ 0] - anchors[ 1] - anchors[ 4] + anchors[ 5]) >= sum )? 128 : 0)
|(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0) |(( (anchors[ 1] - anchors[ 2] - anchors[ 5] + anchors[ 6]) >= sum )? 64 : 0)
|(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0) |(( (anchors[ 2] - anchors[ 3] - anchors[ 6] + anchors[ 7]) >= sum )? 32 : 0)
|(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0) |(( (anchors[ 6] - anchors[ 7] - anchors[10] + anchors[11]) >= sum )? 16 : 0)
@ -122,11 +118,12 @@ namespace cv { namespace gpu { namespace device {
|(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0) |(( (anchors[ 9] - anchors[10] - anchors[13] + anchors[14]) >= sum )? 4 : 0)
|(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0) |(( (anchors[ 8] - anchors[ 9] - anchors[12] + anchors[13]) >= sum )? 2 : 0)
|(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0); |(( (anchors[ 4] - anchors[ 5] - anchors[ 8] + anchors[ 9]) >= sum )? 1 : 0);
return response; return response;
} }
}; };
} // lbp
} } }// namespaces } } }// namespaces
#endif #endif