added different win_stride values feature into gpu HOG, refactored gpu HOG sample

This commit is contained in:
Alexey Spizhevoy
2010-11-17 14:11:30 +00:00
parent 27542529a5
commit 2d01558479
3 changed files with 48 additions and 31 deletions

View File

@@ -198,8 +198,8 @@ __global__ void compute_hists_kernel_many_blocks(const int img_block_width, cons
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, const DevMem2Df& grad,
const DevMem2D& qangle, float sigma, float* block_hists)
int height, int width, const DevMem2Df& grad,
const DevMem2D& qangle, float sigma, float* block_hists)
{
const int nblocks = 1;
@@ -300,7 +300,7 @@ __global__ void normalize_hists_kernel_many_blocks(const int block_hist_size,
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, float* block_hists, float threshold)
int height, int width, float* block_hists, float threshold)
{
const int nblocks = 1;
@@ -336,6 +336,7 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
template <int nthreads, // Number of threads per one histogram block
int nblocks> // Number of histogram block processed by single GPU thread block
__global__ void classify_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, const float* coefs,
float free_coef, float threshold, unsigned char* labels)
{
@@ -343,8 +344,8 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
return;
const float* hist = block_hists + (blockIdx.y * img_block_width +
blockIdx.x * blockDim.z + win_x) *
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x * blockDim.z + win_x) *
cblock_hist_size;
float product = 0.f;
@@ -397,15 +398,18 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const
// We only support win_stride_x == block_stride_x, win_stride_y == block_stride_y
void classify_hists(int win_height, int win_width, int block_stride_x, int block_stride_y,
int height, int width, float* block_hists, float* coefs,
float free_coef, float threshold, unsigned char* labels)
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width, float* block_hists, float* coefs,
float free_coef, float threshold, unsigned char* labels)
{
const int nthreads = 256;
const int nblocks = 1;
int img_win_width = (width - win_width + block_stride_x) / block_stride_x;
int img_win_height = (height - win_height + block_stride_y) / block_stride_y;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1, nblocks);
dim3 grid(div_up(img_win_width, nblocks), img_win_height);
@@ -416,7 +420,8 @@ void classify_hists(int win_height, int win_width, int block_stride_x, int block
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
img_win_width, img_block_width, block_hists, coefs, free_coef, threshold, labels);
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, coefs, free_coef, threshold, labels);
cudaSafeCall(cudaThreadSynchronize());
}
@@ -524,7 +529,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2D& img,
float angle_scale, DevMem2Df grad, DevMem2D qangle)
float angle_scale, DevMem2Df grad, DevMem2D qangle)
{
const int nthreads = 256;
@@ -580,7 +585,7 @@ __global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrEl
void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& img,
float angle_scale, DevMem2Df grad, DevMem2D qangle)
float angle_scale, DevMem2Df grad, DevMem2D qangle)
{
const int nthreads = 256;

View File

@@ -73,9 +73,10 @@ void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, float* block_hists, float threshold);
void classify_hists(int win_height, int win_width, int block_stride_x,
int block_stride_y, int height, int width, float* block_hists,
float* coefs, float free_coef, float threshold, unsigned char* labels);
void classify_hists(int win_height, int win_width, int block_stride_y,
int block_stride_x, int win_stride_y, int win_stride_x, int height,
int width, float* block_hists, float* coefs, float free_coef,
float threshold, unsigned char* labels);
void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img,
float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle);
@@ -209,7 +210,8 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, doub
if (win_stride == Size())
win_stride = block_stride;
else
CV_Assert(win_stride == block_stride);
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
CV_Assert(padding == Size(0, 0));
@@ -229,8 +231,8 @@ void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, doub
block_hists.ptr<float>(), (float)threshold_L2hys);
hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
img.rows, img.cols, block_hists.ptr<float>(), detector.ptr<float>(),
(float)free_coef, (float)hit_threshold, labels.ptr());
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr());
labels.download(labels_host);
unsigned char* vec = labels_host.ptr();