LBP: implemented first version of device side part
This commit is contained in:
@@ -1454,12 +1454,14 @@ private:
|
|||||||
int subsetSize;
|
int subsetSize;
|
||||||
int nodeStep;
|
int nodeStep;
|
||||||
|
|
||||||
// located on gpu
|
// gpu representation of classifier
|
||||||
GpuMat stage_mat;
|
GpuMat stage_mat;
|
||||||
GpuMat trees_mat;
|
GpuMat trees_mat;
|
||||||
GpuMat nodes_mat;
|
GpuMat nodes_mat;
|
||||||
GpuMat leaves_mat;
|
GpuMat leaves_mat;
|
||||||
GpuMat subsets_mat;
|
GpuMat subsets_mat;
|
||||||
|
|
||||||
|
// current integral image
|
||||||
GpuMat integral;
|
GpuMat integral;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -59,7 +59,6 @@ struct Stage
|
|||||||
struct DTreeNode
|
struct DTreeNode
|
||||||
{
|
{
|
||||||
int featureIdx;
|
int featureIdx;
|
||||||
//float threshold; // for ordered features only
|
|
||||||
int left;
|
int left;
|
||||||
int right;
|
int right;
|
||||||
DTreeNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {}
|
DTreeNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {}
|
||||||
@@ -271,7 +270,8 @@ namespace cv { namespace gpu { namespace device
|
|||||||
{
|
{
|
||||||
namespace lbp
|
namespace lbp
|
||||||
{
|
{
|
||||||
void CascadeClassify(DevMem2Db image, DevMem2Db objects, double scaleFactor = 1.2, int minNeighbors = 4, cudaStream_t stream = 0);
|
void cascadeClassify(const DevMem2Db stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets,
|
||||||
|
const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors = 4, cudaStream_t stream = 0);
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
@@ -308,17 +308,8 @@ int cv::gpu::CascadeClassifier_GPU_LBP::detectMultiScale(const GpuMat& image, Gp
|
|||||||
int step = (factor <= 2.) + 1;
|
int step = (factor <= 2.) + 1;
|
||||||
int stripCount = 1, stripSize = processingRectSize.height;
|
int stripCount = 1, stripSize = processingRectSize.height;
|
||||||
|
|
||||||
int y1 = 0;
|
cv::gpu::device::lbp::cascadeClassify(stage_mat, trees_mat, nodes_mat, leaves_mat, subsets_mat,
|
||||||
int y2 = processingRectSize.height;
|
integral, processingRectSize.width, processingRectSize.height, step, subsetSize, objects, minNeighbors);
|
||||||
|
|
||||||
for (int y = y1; y < y2; y += step)
|
|
||||||
for (int x = 0; x < processingRectSize.width; x+=step)
|
|
||||||
{
|
|
||||||
//ToDO: classify
|
|
||||||
int result = 0;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
// TODO: reject levels
|
// TODO: reject levels
|
||||||
|
|
||||||
|
@@ -40,15 +40,51 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#include <opencv2/gpu/device/detail/lbp.hpp>
|
#include <opencv2/gpu/device/lbp.hpp>
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
namespace lbp
|
namespace lbp
|
||||||
{
|
{
|
||||||
void CascadeClassify(DevMem2Db image, DevMem2Db objects, double scaleFactor=1.2, int minNeighbors=4, cudaStream_t stream)
|
__global__ void lbp_classify(const DevMem2D_< ::cv::gpu::device::Stage> stages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets,
|
||||||
|
const DevMem2Db integral, float step, int subsetSize, DevMem2D_<int4> objects)
|
||||||
|
{
|
||||||
|
unsigned int x = threadIdx.x;
|
||||||
|
unsigned int y = blockIdx.x;
|
||||||
|
int nodeOfs = 0, leafOfs = 0;
|
||||||
|
::cv::gpu::device::Feature feature;
|
||||||
|
|
||||||
|
for (int s = 0; s < stages.cols; s++ )
|
||||||
|
{
|
||||||
|
::cv::gpu::device::Stage stage = stages(0, s);
|
||||||
|
int sum = 0;
|
||||||
|
for (int w = 0; w < stage.ntrees; w++)
|
||||||
|
{
|
||||||
|
::cv::gpu::device::ClNode node = nodes(0, nodeOfs);
|
||||||
|
char c = feature();// TODO: inmplement it
|
||||||
|
const int subsetIdx = (nodeOfs * subsetSize);
|
||||||
|
int idx = subsetIdx + ((c >> 5) & ( 1 << (c & 31)) ? leafOfs : leafOfs + 1);
|
||||||
|
sum += leaves(0, subsets(0, idx) );
|
||||||
|
nodeOfs++;
|
||||||
|
leafOfs += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sum < stage.threshold)
|
||||||
|
return; // nothing matched
|
||||||
|
return;//mathed
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void cascadeClassify(const DevMem2Db bstages, const DevMem2Di trees, const DevMem2Db nodes, const DevMem2Df leaves, const DevMem2Di subsets,
|
||||||
|
const DevMem2Db integral, int workWidth, int workHeight, int step, int subsetSize, DevMem2D_<int4> objects, int minNeighbors, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
printf("CascadeClassify");
|
printf("CascadeClassify");
|
||||||
|
int blocks = ceilf(workHeight / (float)step);
|
||||||
|
int threads = ceilf(workWidth / (float)step);
|
||||||
|
DevMem2D_< ::cv::gpu::device::Stage> stages = DevMem2D_< ::cv::gpu::device::Stage>(bstages);
|
||||||
|
|
||||||
|
lbp_classify<<<blocks, threads>>>(stages, trees, nodes, leaves, subsets, integral, step, subsetSize, objects);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}}}
|
}}}
|
@@ -43,6 +43,13 @@
|
|||||||
#ifndef __OPENCV_GPU_DEVICE_LBP_HPP_
|
#ifndef __OPENCV_GPU_DEVICE_LBP_HPP_
|
||||||
#define __OPENCV_GPU_DEVICE_LBP_HPP_
|
#define __OPENCV_GPU_DEVICE_LBP_HPP_
|
||||||
|
|
||||||
|
#include "internal_shared.hpp"
|
||||||
|
// #include "opencv2/gpu/device/border_interpolate.hpp"
|
||||||
|
// #include "opencv2/gpu/device/vec_traits.hpp"
|
||||||
|
// #include "opencv2/gpu/device/vec_math.hpp"
|
||||||
|
// #include "opencv2/gpu/device/saturate_cast.hpp"
|
||||||
|
// #include "opencv2/gpu/device/filters.hpp"
|
||||||
|
|
||||||
// #define CALC_SUM_(p0, p1, p2, p3, offset) \
|
// #define CALC_SUM_(p0, p1, p2, p3, offset) \
|
||||||
// ((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset])
|
// ((p0)[offset] - (p1)[offset] - (p2)[offset] + (p3)[offset])
|
||||||
|
|
||||||
@@ -53,16 +60,34 @@
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace device {
|
namespace cv { namespace gpu { namespace device {
|
||||||
|
|
||||||
|
struct Stage
|
||||||
|
{
|
||||||
|
int first;
|
||||||
|
int ntrees;
|
||||||
|
float threshold;
|
||||||
|
__device__ __forceinline__ Stage(int f = 0, int n = 0, float t = 0.f) : first(f), ntrees(n), threshold(t) {}
|
||||||
|
__device__ __forceinline__ Stage(const Stage& other) : first(other.first), ntrees(other.ntrees), threshold(other.threshold) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ClNode
|
||||||
|
{
|
||||||
|
int featureIdx;
|
||||||
|
int left;
|
||||||
|
int right;
|
||||||
|
__device__ __forceinline__ ClNode(int f = 0, int l = 0, int r = 0) : featureIdx(f), left(l), right(r) {}
|
||||||
|
__device__ __forceinline__ ClNode(const ClNode& other) : featureIdx(other.featureIdx), left(other.left), right(other.right) {}
|
||||||
|
};
|
||||||
|
|
||||||
struct Feature
|
struct Feature
|
||||||
{
|
{
|
||||||
__device__ __forceinline__ Feature(const Feature& other) {(void)other;}
|
__device__ __forceinline__ Feature(const Feature& other) {(void)other;}
|
||||||
__device__ __forceinline__ Feature() {}
|
__device__ __forceinline__ Feature() {}
|
||||||
__device__ __forceinline__ char operator() (volatile int* ptr, int offset)
|
__device__ __forceinline__ char operator() ()//(volatile int* ptr, int offset)
|
||||||
{
|
{
|
||||||
return char(0);
|
return char(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
};
|
||||||
}// namespaces
|
} } }// namespaces
|
||||||
|
|
||||||
#endif
|
#endif
|
Reference in New Issue
Block a user