Merge pull request #2008 from krodyush:pullreq/2.4-opt-131209-hog
This commit is contained in:
commit
22a3cf0fba
@ -76,6 +76,11 @@ namespace cv
|
|||||||
int cdescr_width;
|
int cdescr_width;
|
||||||
int cdescr_height;
|
int cdescr_height;
|
||||||
|
|
||||||
|
// A shift value and type that allows qangle to be different
|
||||||
|
// sizes on different hardware
|
||||||
|
int qangle_step_shift;
|
||||||
|
int qangle_type;
|
||||||
|
|
||||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||||
int nblocks_win_x, int nblocks_win_y);
|
int nblocks_win_x, int nblocks_win_y);
|
||||||
|
|
||||||
@ -153,6 +158,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
|
|||||||
hog_device_cpu = true;
|
hog_device_cpu = true;
|
||||||
else
|
else
|
||||||
hog_device_cpu = false;
|
hog_device_cpu = false;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
|
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
|
||||||
@ -213,7 +219,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
|
|||||||
effect_size = img.size();
|
effect_size = img.size();
|
||||||
|
|
||||||
grad.create(img.size(), CV_32FC2);
|
grad.create(img.size(), CV_32FC2);
|
||||||
qangle.create(img.size(), CV_8UC2);
|
qangle.create(img.size(), hog::qangle_type);
|
||||||
|
|
||||||
const size_t block_hist_size = getBlockHistogramSize();
|
const size_t block_hist_size = getBlockHistogramSize();
|
||||||
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
||||||
@ -1607,6 +1613,16 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
|
|||||||
|
|
||||||
int descr_size = descr_width * nblocks_win_y;
|
int descr_size = descr_width * nblocks_win_y;
|
||||||
cdescr_size = descr_size;
|
cdescr_size = descr_size;
|
||||||
|
|
||||||
|
qangle_type = CV_8UC2;
|
||||||
|
qangle_step_shift = 0;
|
||||||
|
// Some Intel devices have low single-byte access performance,
|
||||||
|
// so we change the datatype here.
|
||||||
|
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||||
|
{
|
||||||
|
qangle_type = CV_32SC2;
|
||||||
|
qangle_step_shift = 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::ocl::device::hog::compute_hists(int nbins,
|
void cv::ocl::device::hog::compute_hists(int nbins,
|
||||||
@ -1628,7 +1644,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
|
|||||||
int blocks_total = img_block_width * img_block_height;
|
int blocks_total = img_block_width * img_block_height;
|
||||||
|
|
||||||
int grad_quadstep = grad.step >> 2;
|
int grad_quadstep = grad.step >> 2;
|
||||||
int qangle_step = qangle.step;
|
int qangle_step = qangle.step >> qangle_step_shift;
|
||||||
|
|
||||||
int blocks_in_group = 4;
|
int blocks_in_group = 4;
|
||||||
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
|
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
|
||||||
@ -1892,7 +1908,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
|
|||||||
char correctGamma = (correct_gamma) ? 1 : 0;
|
char correctGamma = (correct_gamma) ? 1 : 0;
|
||||||
int img_step = img.step;
|
int img_step = img.step;
|
||||||
int grad_quadstep = grad.step >> 3;
|
int grad_quadstep = grad.step >> 3;
|
||||||
int qangle_step = qangle.step >> 1;
|
int qangle_step = qangle.step >> (1 + qangle_step_shift);
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&width));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&width));
|
||||||
@ -1927,7 +1943,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
|
|||||||
char correctGamma = (correct_gamma) ? 1 : 0;
|
char correctGamma = (correct_gamma) ? 1 : 0;
|
||||||
int img_step = img.step >> 2;
|
int img_step = img.step >> 2;
|
||||||
int grad_quadstep = grad.step >> 3;
|
int grad_quadstep = grad.step >> 3;
|
||||||
int qangle_step = qangle.step >> 1;
|
int qangle_step = qangle.step >> (1 + qangle_step_shift);
|
||||||
|
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&height));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&width));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&width));
|
||||||
|
@ -50,6 +50,14 @@
|
|||||||
#define NTHREADS 256
|
#define NTHREADS 256
|
||||||
#define CV_PI_F 3.1415926535897932384626433832795f
|
#define CV_PI_F 3.1415926535897932384626433832795f
|
||||||
|
|
||||||
|
#ifdef INTEL_DEVICE
|
||||||
|
#define QANGLE_TYPE int
|
||||||
|
#define QANGLE_TYPE2 int2
|
||||||
|
#else
|
||||||
|
#define QANGLE_TYPE uchar
|
||||||
|
#define QANGLE_TYPE2 uchar2
|
||||||
|
#endif
|
||||||
|
|
||||||
//----------------------------------------------------------------------------
|
//----------------------------------------------------------------------------
|
||||||
// Histogram computation
|
// Histogram computation
|
||||||
// 12 threads for a cell, 12x4 threads per block
|
// 12 threads for a cell, 12x4 threads per block
|
||||||
@ -59,7 +67,7 @@ __kernel void compute_hists_lut_kernel(
|
|||||||
const int cnbins, const int cblock_hist_size, const int img_block_width,
|
const int cnbins, const int cblock_hist_size, const int img_block_width,
|
||||||
const int blocks_in_group, const int blocks_total,
|
const int blocks_in_group, const int blocks_total,
|
||||||
const int grad_quadstep, const int qangle_step,
|
const int grad_quadstep, const int qangle_step,
|
||||||
__global const float* grad, __global const uchar* qangle,
|
__global const float* grad, __global const QANGLE_TYPE* qangle,
|
||||||
__global const float* gauss_w_lut,
|
__global const float* gauss_w_lut,
|
||||||
__global float* block_hists, __local float* smem)
|
__global float* block_hists, __local float* smem)
|
||||||
{
|
{
|
||||||
@ -86,7 +94,7 @@ __kernel void compute_hists_lut_kernel(
|
|||||||
|
|
||||||
__global const float* grad_ptr = (gid < blocks_total) ?
|
__global const float* grad_ptr = (gid < blocks_total) ?
|
||||||
grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
|
grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
|
||||||
__global const uchar* qangle_ptr = (gid < blocks_total) ?
|
__global const QANGLE_TYPE* qangle_ptr = (gid < blocks_total) ?
|
||||||
qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
|
qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
|
||||||
|
|
||||||
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
|
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
|
||||||
@ -101,7 +109,7 @@ __kernel void compute_hists_lut_kernel(
|
|||||||
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
|
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
|
||||||
{
|
{
|
||||||
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
|
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
|
||||||
uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
|
QANGLE_TYPE2 bin = (QANGLE_TYPE2) (qangle_ptr[0], qangle_ptr[1]);
|
||||||
|
|
||||||
grad_ptr += grad_quadstep;
|
grad_ptr += grad_quadstep;
|
||||||
qangle_ptr += qangle_step;
|
qangle_ptr += qangle_step;
|
||||||
@ -558,7 +566,7 @@ __kernel void extract_descrs_by_cols_kernel(
|
|||||||
__kernel void compute_gradients_8UC4_kernel(
|
__kernel void compute_gradients_8UC4_kernel(
|
||||||
const int height, const int width,
|
const int height, const int width,
|
||||||
const int img_step, const int grad_quadstep, const int qangle_step,
|
const int img_step, const int grad_quadstep, const int qangle_step,
|
||||||
const __global uchar4 * img, __global float * grad, __global uchar * qangle,
|
const __global uchar4 * img, __global float * grad, __global QANGLE_TYPE * qangle,
|
||||||
const float angle_scale, const char correct_gamma, const int cnbins)
|
const float angle_scale, const char correct_gamma, const int cnbins)
|
||||||
{
|
{
|
||||||
const int x = get_global_id(0);
|
const int x = get_global_id(0);
|
||||||
@ -660,7 +668,7 @@ __kernel void compute_gradients_8UC4_kernel(
|
|||||||
__kernel void compute_gradients_8UC1_kernel(
|
__kernel void compute_gradients_8UC1_kernel(
|
||||||
const int height, const int width,
|
const int height, const int width,
|
||||||
const int img_step, const int grad_quadstep, const int qangle_step,
|
const int img_step, const int grad_quadstep, const int qangle_step,
|
||||||
__global const uchar * img, __global float * grad, __global uchar * qangle,
|
__global const uchar * img, __global float * grad, __global QANGLE_TYPE * qangle,
|
||||||
const float angle_scale, const char correct_gamma, const int cnbins)
|
const float angle_scale, const char correct_gamma, const int cnbins)
|
||||||
{
|
{
|
||||||
const int x = get_global_id(0);
|
const int x = get_global_id(0);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user