integrated patch: HOG confidence calculation. Thanks, Wongun.
This commit is contained in:
@@ -1142,6 +1142,13 @@ private:
|
||||
|
||||
|
||||
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
|
||||
struct CV_EXPORTS HOGConfidence
|
||||
{
|
||||
double scale;
|
||||
vector<Point> locations;
|
||||
vector<double> confidences;
|
||||
vector<double> part_scores[4];
|
||||
};
|
||||
|
||||
struct CV_EXPORTS HOGDescriptor
|
||||
{
|
||||
@@ -1173,6 +1180,13 @@ struct CV_EXPORTS HOGDescriptor
|
||||
Size padding=Size(), double scale0=1.05,
|
||||
int group_threshold=2);
|
||||
|
||||
void computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,
|
||||
Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences);
|
||||
|
||||
void computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,
|
||||
double hit_threshold, Size win_stride, Size padding,
|
||||
vector<HOGConfidence> &conf_out, int group_threshold);
|
||||
|
||||
void getDescriptors(const GpuMat& img, Size win_stride,
|
||||
GpuMat& descriptors,
|
||||
int descr_format=DESCR_FORMAT_COL_BY_COL);
|
||||
|
@@ -326,6 +326,97 @@ namespace cv { namespace gpu { namespace device
|
||||
// Linear SVM based classification
|
||||
//
|
||||
|
||||
// return confidence values not just positive location
|
||||
template <int nthreads, // Number of threads per one histogram block
|
||||
int nblocks> // Number of histogram block processed by single GPU thread block
|
||||
__global__ void compute_confidence_hists_kernel_many_blocks(const int img_win_width, const int img_block_width,
|
||||
const int win_block_stride_x, const int win_block_stride_y,
|
||||
const float* block_hists, const float* coefs,
|
||||
float free_coef, float threshold, float* confidences)
|
||||
{
|
||||
const int win_x = threadIdx.z;
|
||||
if (blockIdx.x * blockDim.z + win_x >= img_win_width)
|
||||
return;
|
||||
|
||||
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
|
||||
blockIdx.x * win_block_stride_x * blockDim.z + win_x) *
|
||||
cblock_hist_size;
|
||||
|
||||
float product = 0.f;
|
||||
for (int i = threadIdx.x; i < cdescr_size; i += nthreads)
|
||||
{
|
||||
int offset_y = i / cdescr_width;
|
||||
int offset_x = i - offset_y * cdescr_width;
|
||||
product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
|
||||
}
|
||||
|
||||
__shared__ float products[nthreads * nblocks];
|
||||
|
||||
const int tid = threadIdx.z * nthreads + threadIdx.x;
|
||||
products[tid] = product;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (nthreads >= 512)
|
||||
{
|
||||
if (threadIdx.x < 256) products[tid] = product = product + products[tid + 256];
|
||||
__syncthreads();
|
||||
}
|
||||
if (nthreads >= 256)
|
||||
{
|
||||
if (threadIdx.x < 128) products[tid] = product = product + products[tid + 128];
|
||||
__syncthreads();
|
||||
}
|
||||
if (nthreads >= 128)
|
||||
{
|
||||
if (threadIdx.x < 64) products[tid] = product = product + products[tid + 64];
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if (threadIdx.x < 32)
|
||||
{
|
||||
volatile float* smem = products;
|
||||
if (nthreads >= 64) smem[tid] = product = product + smem[tid + 32];
|
||||
if (nthreads >= 32) smem[tid] = product = product + smem[tid + 16];
|
||||
if (nthreads >= 16) smem[tid] = product = product + smem[tid + 8];
|
||||
if (nthreads >= 8) smem[tid] = product = product + smem[tid + 4];
|
||||
if (nthreads >= 4) smem[tid] = product = product + smem[tid + 2];
|
||||
if (nthreads >= 2) smem[tid] = product = product + smem[tid + 1];
|
||||
}
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
confidences[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x]
|
||||
= (float)(product + free_coef);
|
||||
|
||||
}
|
||||
|
||||
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, float *confidences)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
const int nblocks = 1;
|
||||
|
||||
int win_block_stride_x = win_stride_x / block_stride_x;
|
||||
int win_block_stride_y = win_stride_y / block_stride_y;
|
||||
int img_win_width = (width - win_width + win_stride_x) / win_stride_x;
|
||||
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
|
||||
|
||||
dim3 threads(nthreads, 1, nblocks);
|
||||
dim3 grid(divUp(img_win_width, nblocks), img_win_height);
|
||||
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
|
||||
cudaFuncCachePreferL1));
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
||||
block_stride_x;
|
||||
compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
|
||||
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
|
||||
block_hists, coefs, free_coef, threshold, confidences);
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <int nthreads, // Number of threads per one histogram block
|
||||
int nblocks> // Number of histogram block processed by single GPU thread block
|
||||
|
@@ -57,6 +57,8 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) {
|
||||
std::vector<float> cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector<float>(); }
|
||||
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector<float>(); }
|
||||
std::vector<float> cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector<float>(); }
|
||||
void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat&, vector<Point>&, double, Size, Size, vector<Point>&, vector<double>&) { throw_nogpu(); }
|
||||
void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, vector<Rect>&, double, Size, Size, vector<HOGConfidence>&, int) { throw_nogpu(); }
|
||||
|
||||
#else
|
||||
|
||||
@@ -79,6 +81,10 @@ namespace cv { namespace gpu { namespace device
|
||||
int width, float* block_hists, float* coefs, float free_coef,
|
||||
float threshold, unsigned char* labels);
|
||||
|
||||
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, float *confidences);
|
||||
|
||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
cv::gpu::DevMem2Df descriptors);
|
||||
@@ -258,6 +264,99 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat& img, vector<Point>& hits, double hit_threshold,
|
||||
Size win_stride, Size padding, vector<Point>& locations, vector<double>& confidences)
|
||||
{
|
||||
CV_Assert(padding == Size(0, 0));
|
||||
|
||||
hits.clear();
|
||||
if (detector.empty())
|
||||
return;
|
||||
|
||||
computeBlockHistograms(img);
|
||||
|
||||
if (win_stride == Size())
|
||||
win_stride = block_stride;
|
||||
else
|
||||
CV_Assert(win_stride.width % block_stride.width == 0 &&
|
||||
win_stride.height % block_stride.height == 0);
|
||||
|
||||
Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride);
|
||||
labels.create(1, wins_per_img.area(), CV_32F);
|
||||
|
||||
hog::compute_confidence_hists(win_size.height, win_size.width, block_stride.height, block_stride.width,
|
||||
win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr<float>(),
|
||||
detector.ptr<float>(), (float)free_coef, (float)hit_threshold, labels.ptr<float>());
|
||||
|
||||
labels.download(labels_host);
|
||||
float* vec = labels_host.ptr<float>();
|
||||
|
||||
// does not support roi for now..
|
||||
locations.clear();
|
||||
confidences.clear();
|
||||
for (int i = 0; i < wins_per_img.area(); i++)
|
||||
{
|
||||
int y = i / wins_per_img.width;
|
||||
int x = i - wins_per_img.width * y;
|
||||
if (vec[i] >= hit_threshold)
|
||||
hits.push_back(Point(x * win_stride.width, y * win_stride.height));
|
||||
|
||||
Point pt(win_stride.width * x, win_stride.height * y);
|
||||
locations.push_back(pt);
|
||||
confidences.push_back((double)vec[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vector<Rect>& found_locations,
|
||||
double hit_threshold, Size win_stride, Size padding,
|
||||
vector<HOGConfidence> &conf_out, int group_threshold)
|
||||
{
|
||||
vector<double> level_scale;
|
||||
double scale = 1.;
|
||||
int levels = 0;
|
||||
|
||||
for (levels = 0; levels < conf_out.size(); levels++)
|
||||
{
|
||||
scale = conf_out[levels].scale;
|
||||
level_scale.push_back(scale);
|
||||
if (cvRound(img.cols/scale) < win_size.width ||
|
||||
cvRound(img.rows/scale) < win_size.height)
|
||||
break;
|
||||
}
|
||||
|
||||
levels = std::max(levels, 1);
|
||||
level_scale.resize(levels);
|
||||
|
||||
std::vector<Rect> all_candidates;
|
||||
vector<Point> locations;
|
||||
|
||||
for (size_t i = 0; i < level_scale.size(); i++)
|
||||
{
|
||||
double scale = level_scale[i];
|
||||
Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale));
|
||||
GpuMat smaller_img;
|
||||
|
||||
if (sz == img.size())
|
||||
smaller_img = img;
|
||||
else
|
||||
{
|
||||
smaller_img.create(sz, img.type());
|
||||
switch (img.type()) {
|
||||
case CV_8UC1: hog::resize_8UC1(img, smaller_img); break;
|
||||
case CV_8UC4: hog::resize_8UC4(img, smaller_img); break;
|
||||
}
|
||||
}
|
||||
|
||||
computeConfidence(smaller_img, locations, hit_threshold, win_stride, padding, conf_out[i].locations, conf_out[i].confidences);
|
||||
|
||||
Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale));
|
||||
for (size_t j = 0; j < locations.size(); j++)
|
||||
all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size));
|
||||
}
|
||||
found_locations.assign(all_candidates.begin(), all_candidates.end());
|
||||
groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/);
|
||||
}
|
||||
|
||||
|
||||
void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector<Point>& hits, double hit_threshold, Size win_stride, Size padding)
|
||||
{
|
||||
|
Reference in New Issue
Block a user