added getDescriptors support into gpu HOG, also added commented test for this feature

This commit is contained in:
Alexey Spizhevoy
2010-11-18 09:22:23 +00:00
parent 515bdfa71e
commit 11c0c5bf85
5 changed files with 221 additions and 81 deletions

View File

@@ -397,11 +397,9 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const
}
// We only support win_stride_x == block_stride_x, win_stride_y == block_stride_y
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width, float* block_hists, float* coefs,
float free_coef, float threshold, unsigned char* labels)
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
float* coefs, float free_coef, float threshold, unsigned char* labels)
{
const int nthreads = 256;
const int nblocks = 1;
@@ -425,8 +423,54 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block
cudaSafeCall(cudaThreadSynchronize());
}
//----------------------------------------------------------------------------
// Extract descriptors
//------------------------------------------------------------
template <int nthreads>
__global__ void extract_descriptors_kernel(const int img_win_width, const int img_block_width,
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, PtrElemStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
blockIdx.x * win_block_stride_x) * cblock_hist_size;
// Get left top corner of the window in dst
float* descriptor = descriptors.ptr(blockIdx.y * gridDim.x + blockIdx.x);
// Copy elements from src to dst
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
{
int offset_y = i / cdescr_width;
int offset_x = i - offset_y * cdescr_width;
descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
}
}
void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
DevMem2Df descriptors)
{
const int nthreads = 256;
int win_block_stride_x = win_stride_x / block_stride_x;
int win_block_stride_y = win_stride_y / block_stride_y;
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1);
dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
extract_descriptors_kernel<nthreads><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, descriptors);
cudaSafeCall(cudaThreadSynchronize());
}
//----------------------------------------------------------------------------
// Gradients computation
@@ -481,7 +525,7 @@ __global__ void compute_gradients_8UC4_kernel(int height, int width, const PtrEl
float3 dx = make_float3(sqrtf(b.x) - sqrtf(a.x),
sqrtf(b.y) - sqrtf(a.y),
sqrtf(b.z) - sqrtf(a.z));
sqrtf(b.z) - sqrtf(a.z));
float3 dy = make_float3(0.f, 0.f, 0.f);
if (blockIdx.y > 0 && blockIdx.y < height - 1)

View File

@@ -51,8 +51,10 @@ double cv::gpu::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; }
bool cv::gpu::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; }
void cv::gpu::HOGDescriptor::setSVMDetector(const vector<float>&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::computeGradient(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::detect(const GpuMat&, vector<Point>&, double, Size, Size) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat&, vector<Rect>&, double, Size, Size, double, int) { throw_nogpu(); }
void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&) { throw_nogpu(); }
std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector_48x96() { throw_nogpu(); return std::vector<float>(); }
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector_64x128() { throw_nogpu(); return std::vector<float>(); }
@@ -76,6 +78,10 @@ void classify_hists(int win_height, int win_width, int block_stride_y,
int width, float* block_hists, float* coefs, float free_coef,
float threshold, unsigned char* labels);
void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
cv::gpu::DevMem2Df descriptors);
void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img,
float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle);
void compute_gradients_8UC4(int nbins, int height, int width, const cv::gpu::DevMem2D& img,
@@ -212,39 +218,23 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img)
}
////TODO: test it
//void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
// vector<GpuMat>& descriptors)
//{
// CV_Assert(win_stride.width % block_stride.width == 0 &&
// win_stride.height % block_stride.height == 0);
//
// computeBlockHistograms(img);
//
// Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
// GpuMat hists_reshaped = block_hists.reshape(0, blocks_per_img.height);
//
// const int block_hist_size = getBlockHistogramSize();
// Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
// Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
//
// descriptors.resize(wins_per_img.area());
// for (int i = 0; i < wins_per_img.height; ++i)
// {
// for (int j = 0; j < wins_per_img.width; ++j)
// {
// Range rows;
// rows.start = i * (blocks_per_win.height + 1);
// rows.end = rows.start + blocks_per_win.height;
//
// Range cols;
// cols.start = j * (blocks_per_win.width + 1) * block_hist_size;
// cols.end = cols.start + blocks_per_win.width * block_hist_size;
//
// descriptors[i * wins_per_img.width + j] = hists_reshaped(rows, cols);
// }
// }
//}
void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors)
{
CV_Assert(win_stride.width % block_stride.width == 0 &&
win_stride.height % block_stride.height == 0);
computeBlockHistograms(img);
const int block_hist_size = getBlockHistogramSize();
Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride);
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F);
hog::extract_descriptors(win_size.height, win_size.width, block_stride.height, block_stride.width,
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
descriptors);
}
void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold,