From 436d2ff1fc6297774cfd5836f4994cea1d2659df Mon Sep 17 00:00:00 2001 From: Marina Kolpakova Date: Tue, 10 Jul 2012 11:58:06 +0000 Subject: [PATCH] LBP classifer moved to ptr from DevMem2D --- modules/gpu/src/cuda/lbp.cu | 11 ++-- modules/gpu/src/opencv2/gpu/device/lbp.hpp | 70 ++++++++++++---------- 2 files changed, 46 insertions(+), 35 deletions(-) diff --git a/modules/gpu/src/cuda/lbp.cu b/modules/gpu/src/cuda/lbp.cu index 9981fa6f0..aa3cdb3b1 100644 --- a/modules/gpu/src/cuda/lbp.cu +++ b/modules/gpu/src/cuda/lbp.cu @@ -48,8 +48,9 @@ namespace cv { namespace gpu { namespace device { namespace lbp { - __global__ void lbp_classify_stump(Stage* stages, int nstages, ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features, - const DevMem2Di integral, int workWidth, int workHeight, int clWidth, int clHeight, float scale, int step, int subsetSize, DevMem2D_ objects, unsigned int* n) + __global__ void lbp_classify_stump(const Stage* stages, const int nstages, const ClNode* nodes, const float* leaves, const int* subsets, const uchar4* features, + const int* integral, const int istep, const int workWidth,const int workHeight, const int clWidth, const int clHeight, const float scale, const int step, + const int subsetSize, DevMem2D_ objects, unsigned int* n) { int x = threadIdx.x * step; int y = blockIdx.x * step; @@ -68,7 +69,7 @@ namespace cv { namespace gpu { namespace device ClNode node = nodes[current_node]; uchar4 feature = features[node.featureIdx]; - int c = evaluator(y, x, feature, integral); + int c = evaluator( (y + feature.y) * istep + x + feature.x , feature, integral, istep); const int* subsetIdx = subsets + (current_node * subsetSize); int idx = (subsetIdx[c >> 5] & ( 1 << (c & 31))) ? current_leave : current_leave + 1; @@ -189,8 +190,10 @@ namespace cv { namespace gpu { namespace device const float* leaves = mleaves.ptr(); const int* subsets = msubsets.ptr(); const uchar4* features = (uchar4*)(mfeatures.ptr()); + const int* integ = integral.ptr(); + int istep = integral.step / sizeof(int); - lbp_classify_stump<<>>(stages, nstages, nodes, leaves, subsets, features, integral, + lbp_classify_stump<<>>(stages, nstages, nodes, leaves, subsets, features, integ, istep, workWidth, workHeight, clWidth, clHeight, scale, step, subsetSize, objects, classified); } diff --git a/modules/gpu/src/opencv2/gpu/device/lbp.hpp b/modules/gpu/src/opencv2/gpu/device/lbp.hpp index baa78e3d0..8508fa41b 100644 --- a/modules/gpu/src/opencv2/gpu/device/lbp.hpp +++ b/modules/gpu/src/opencv2/gpu/device/lbp.hpp @@ -160,68 +160,76 @@ __device__ __forceinline__ T __atomicMin(T* address, T val) __device__ __forceinline__ LBP() {} //feature as uchar x, y - left top, z,w - right bottom - __device__ __forceinline__ int operator() (unsigned int y, unsigned int x, uchar4 feature, const DevMem2Di integral) const + __device__ __forceinline__ int operator() (unsigned int y, uchar4 feature, const int* integral, int step) const { int x_off = 2 * feature.z; - int anchors[9];// = {0,0,0, 0,0,0, 0,0,0}; + int anchors[9]; - x +=feature.x; - y +=feature.y; - anchors[0] = integral(y, x); - anchors[1] = integral(y, x + feature.z); + anchors[0] = integral[y]; + anchors[1] = integral[y + feature.z]; anchors[0] -= anchors[1]; - anchors[2] = integral(y, x + x_off); + anchors[2] = integral[y + x_off]; anchors[1] -= anchors[2]; - anchors[2] -= integral(y, x + feature.z + x_off); - y+=feature.w; + anchors[2] -= integral[y + feature.z + x_off]; + y+=feature.w * step; - anchors[3] = integral(y, x); - anchors[4] = integral(y, x + feature.z); + anchors[3] = integral[y]; + anchors[4] = integral[y + feature.z]; anchors[3] -= anchors[4]; - anchors[5] = integral(y, x + x_off); + anchors[5] = integral[y + x_off]; anchors[4] -= anchors[5]; - anchors[5] -= integral(y, x + feature.z + x_off); + anchors[5] -= integral[y + feature.z + x_off]; anchors[0] -= anchors[3]; anchors[1] -= anchors[4]; anchors[2] -= anchors[5]; // 0 - 2 contains s0 - s2 - y+=feature.w; - anchors[6] = integral(y, x); - anchors[7] = integral(y, x + feature.z); + y+=feature.w * step; + anchors[6] = integral[y]; + anchors[7] = integral[y + feature.z]; anchors[6] -= anchors[7]; - anchors[8] = integral(y, x + x_off); + anchors[8] = integral[y + x_off]; anchors[7] -= anchors[8]; - anchors[8] -= integral(y, x + x_off + feature.z); + anchors[8] -= integral[y + x_off + feature.z]; anchors[3] -= anchors[6]; anchors[4] -= anchors[7]; anchors[5] -= anchors[8]; // 3 - 5 contains s3 - s5 - int response = ((1 - ((unsigned int)(anchors[0] - anchors[4]) >> 31)) << 7); - response |= ((1 - ((unsigned int)(anchors[1] - anchors[4]) >> 31)) << 6); - response |= ((1 - ((unsigned int)(anchors[2] - anchors[4]) >> 31)) << 5); - response |= ((1 - ((unsigned int)(anchors[5] - anchors[4]) >> 31)) << 4); - response |= ((1 - ((unsigned int)(anchors[3] - anchors[4]) >> 31)) << 0); + anchors[0] -= anchors[4]; + anchors[1] -= anchors[4]; + anchors[2] -= anchors[4]; + anchors[3] -= anchors[4]; + anchors[5] -= anchors[4]; - y+=feature.w; - anchors[0] = integral(y, x); - anchors[1] = integral(y, x + feature.z); + int response = (~(anchors[0] >> 31)) & 128; + response |= (~(anchors[1] >> 31)) & 64;; + response |= (~(anchors[2] >> 31)) & 32; + response |= (~(anchors[5] >> 31)) & 16; + response |= (~(anchors[3] >> 31)) & 1; + + y+=feature.w * step; + anchors[0] = integral[y]; + anchors[1] = integral[y + feature.z]; anchors[0] -= anchors[1]; - anchors[2] = integral(y, x + x_off); + anchors[2] = integral[y + x_off]; anchors[1] -= anchors[2]; - anchors[2] -= integral(y, x + x_off + feature.z); + anchors[2] -= integral[y + x_off + feature.z]; anchors[6] -= anchors[0]; anchors[7] -= anchors[1]; anchors[8] -= anchors[2]; // 0 -2 contains s6 - s8 - response |= ((1 - ((unsigned int)(anchors[6] - anchors[4]) >> 31)) << 1); - response |= ((1 - ((unsigned int)(anchors[7] - anchors[4]) >> 31)) << 2); - response |= ((1 - ((unsigned int)(anchors[8] - anchors[4]) >> 31)) << 3); + anchors[6] -= anchors[4]; + anchors[7] -= anchors[4]; + anchors[8] -= anchors[4]; + + response |= (~(anchors[6] >> 31)) & 2; + response |= (~(anchors[7] >> 31)) & 4; + response |= (~(anchors[8] >> 31)) & 8; return response; }