LBP: switched to texture implementation
This commit is contained in:
@@ -48,8 +48,102 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace lbp
|
||||
{
|
||||
|
||||
texture<int, cudaTextureType2D, cudaReadModeElementType> tintegral(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
struct LBP
|
||||
{
|
||||
__device__ __forceinline__ LBP(const LBP& other) {(void)other;}
|
||||
__device__ __forceinline__ LBP() {}
|
||||
|
||||
//feature as uchar x, y - left top, z,w - right bottom
|
||||
__device__ __forceinline__ int operator() (int ty, int tx, int fh, int featurez, int& shift) const
|
||||
{
|
||||
int anchors[9];
|
||||
|
||||
anchors[0] = tex2D(tintegral, tx, ty);
|
||||
anchors[1] = tex2D(tintegral, tx + featurez, ty);
|
||||
anchors[0] -= anchors[1];
|
||||
anchors[2] = tex2D(tintegral, tx + featurez * 2, ty);
|
||||
anchors[1] -= anchors[2];
|
||||
anchors[2] -= tex2D(tintegral, tx + featurez * 3, ty);
|
||||
|
||||
ty += fh;
|
||||
anchors[3] = tex2D(tintegral, tx, ty);
|
||||
anchors[4] = tex2D(tintegral, tx + featurez, ty);
|
||||
anchors[3] -= anchors[4];
|
||||
anchors[5] = tex2D(tintegral, tx + featurez * 2, ty);
|
||||
anchors[4] -= anchors[5];
|
||||
anchors[5] -= tex2D(tintegral, tx + featurez * 3, ty);
|
||||
|
||||
anchors[0] -= anchors[3];
|
||||
anchors[1] -= anchors[4];
|
||||
anchors[2] -= anchors[5];
|
||||
// 0 - 2 contains s0 - s2
|
||||
|
||||
ty += fh;
|
||||
anchors[6] = tex2D(tintegral, tx, ty);
|
||||
anchors[7] = tex2D(tintegral, tx + featurez, ty);
|
||||
anchors[6] -= anchors[7];
|
||||
anchors[8] = tex2D(tintegral, tx + featurez * 2, ty);
|
||||
anchors[7] -= anchors[8];
|
||||
anchors[8] -= tex2D(tintegral, tx + featurez * 3, ty);
|
||||
|
||||
anchors[3] -= anchors[6];
|
||||
anchors[4] -= anchors[7];
|
||||
anchors[5] -= anchors[8];
|
||||
// 3 - 5 contains s3 - s5
|
||||
|
||||
anchors[0] -= anchors[4];
|
||||
anchors[1] -= anchors[4];
|
||||
anchors[2] -= anchors[4];
|
||||
anchors[3] -= anchors[4];
|
||||
anchors[5] -= anchors[4];
|
||||
|
||||
int response = (~(anchors[0] >> 31)) & 4;
|
||||
response |= (~(anchors[1] >> 31)) & 2;;
|
||||
response |= (~(anchors[2] >> 31)) & 1;
|
||||
|
||||
shift = (~(anchors[5] >> 31)) & 16;
|
||||
shift |= (~(anchors[3] >> 31)) & 1;
|
||||
|
||||
ty += fh;
|
||||
anchors[0] = tex2D(tintegral, tx, ty);
|
||||
anchors[1] = tex2D(tintegral, tx + featurez, ty);
|
||||
anchors[0] -= anchors[1];
|
||||
anchors[2] = tex2D(tintegral, tx + featurez * 2, ty);
|
||||
anchors[1] -= anchors[2];
|
||||
anchors[2] -= tex2D(tintegral, tx + featurez * 3, ty);
|
||||
|
||||
anchors[6] -= anchors[0];
|
||||
anchors[7] -= anchors[1];
|
||||
anchors[8] -= anchors[2];
|
||||
// 0 -2 contains s6 - s8
|
||||
|
||||
anchors[6] -= anchors[4];
|
||||
anchors[7] -= anchors[4];
|
||||
anchors[8] -= anchors[4];
|
||||
|
||||
shift |= (~(anchors[6] >> 31)) & 2;
|
||||
shift |= (~(anchors[7] >> 31)) & 4;
|
||||
shift |= (~(anchors[8] >> 31)) & 8;
|
||||
return response;
|
||||
}
|
||||
};
|
||||
|
||||
void bindIntegral(DevMem2Di integral)
|
||||
{
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, &tintegral, integral.ptr(), &desc, (size_t)integral.cols, (size_t)integral.rows, (size_t)integral.step));
|
||||
}
|
||||
|
||||
void unbindIntegral()
|
||||
{
|
||||
cudaSafeCall( cudaUnbindTexture(&tintegral));
|
||||
}
|
||||
|
||||
__global__ void lbp_classify_stump(const Stage* stages, const int nstages, const ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features,
|
||||
const int* integral, const int istep, const int workWidth,const int workHeight, const int clWidth, const int clHeight, const float scale, const int step,
|
||||
/* const int* integral,const int istep, const int workWidth,const int workHeight,*/ const int clWidth, const int clHeight, const float scale, const int step,
|
||||
const int subsetSize, DevMem2D_<int4> objects, unsigned int* n)
|
||||
{
|
||||
int x = threadIdx.x * step;
|
||||
@@ -63,21 +157,18 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
float sum = 0;
|
||||
Stage stage = stages[s];
|
||||
|
||||
for (int t = 0; t < stage.ntrees; t++)
|
||||
{
|
||||
ClNode node = nodes[current_node];
|
||||
|
||||
uchar4 feature = features[node.featureIdx];
|
||||
int c = evaluator( (y + feature.y) * istep + x + feature.x , feature.w * istep, feature.z, integral, istep);
|
||||
const int* subsetIdx = subsets + (current_node * subsetSize);
|
||||
|
||||
int idx = (subsetIdx[c >> 5] & ( 1 << (c & 31))) ? current_leave : current_leave + 1;
|
||||
int shift;
|
||||
int c = evaluator(y + feature.y, x + feature.x, feature.w, feature.z, shift);
|
||||
int idx = (subsets[ current_node * subsetSize + c] & ( 1 << shift)) ? current_leave : current_leave + 1;
|
||||
sum += leaves[idx];
|
||||
current_node += 1;
|
||||
current_leave += 2;
|
||||
}
|
||||
|
||||
if (sum < stage.threshold)
|
||||
return;
|
||||
}
|
||||
@@ -85,8 +176,8 @@ namespace cv { namespace gpu { namespace device
|
||||
int4 rect;
|
||||
rect.x = roundf(x * scale);
|
||||
rect.y = roundf(y * scale);
|
||||
rect.z = roundf(clWidth);
|
||||
rect.w = roundf(clHeight);
|
||||
rect.z = clWidth;
|
||||
rect.w = clHeight;
|
||||
#if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120)
|
||||
int res = __atomicInc(n, 100U);
|
||||
#else
|
||||
@@ -178,8 +269,8 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
void classifyStump(const DevMem2Db mstages, const int nstages, const DevMem2Di mnodes, const DevMem2Df mleaves, const DevMem2Di msubsets, const DevMem2Db mfeatures,
|
||||
const DevMem2Di integral, const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize,
|
||||
void classifyStump(const DevMem2Db& mstages, const int nstages, const DevMem2Di& mnodes, const DevMem2Df& mleaves, const DevMem2Di& msubsets, const DevMem2Db& mfeatures,
|
||||
/*const DevMem2Di& integral,*/ const int workWidth, const int workHeight, const int clWidth, const int clHeight, float scale, int step, int subsetSize,
|
||||
DevMem2D_<int4> objects, unsigned int* classified)
|
||||
{
|
||||
int blocks = ceilf(workHeight / (float)step);
|
||||
@@ -190,11 +281,8 @@ namespace cv { namespace gpu { namespace device
|
||||
const float* leaves = mleaves.ptr();
|
||||
const int* subsets = msubsets.ptr();
|
||||
const uchar4* features = (uchar4*)(mfeatures.ptr());
|
||||
const int* integ = integral.ptr();
|
||||
int istep = integral.step / sizeof(int);
|
||||
|
||||
lbp_classify_stump<<<blocks, threads>>>(stages, nstages, nodes, leaves, subsets, features, integ, istep,
|
||||
workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects, classified);
|
||||
lbp_classify_stump<<<blocks, threads>>>(stages, nstages, nodes, leaves, subsets, features, /*integ, istep,
|
||||
workWidth, workHeight,*/ clWidth, clHeight, scale, step, subsetSize, objects, classified);
|
||||
}
|
||||
|
||||
int connectedConmonents(DevMem2D_<int4> candidates, DevMem2D_<int4> objects, int groupThreshold, float grouping_eps, unsigned int* nclasses)
|
||||
|
||||
Reference in New Issue
Block a user