Merge pull request #2481 from akarsakov:fix_hog_intel

This commit is contained in:
Andrey Pavlenko 2014-03-14 19:17:03 +04:00 committed by OpenCV Buildbot
commit dd6cf15a39

View File

@ -50,14 +50,6 @@
#define NTHREADS 256 #define NTHREADS 256
#define CV_PI_F 3.1415926535897932384626433832795f #define CV_PI_F 3.1415926535897932384626433832795f
#ifdef INTEL_DEVICE
#define QANGLE_TYPE int
#define QANGLE_TYPE2 int2
#else
#define QANGLE_TYPE uchar
#define QANGLE_TYPE2 uchar2
#endif
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
// Histogram computation // Histogram computation
// 12 threads for a cell, 12x4 threads per block // 12 threads for a cell, 12x4 threads per block
@ -67,7 +59,7 @@ __kernel void compute_hists_lut_kernel(
const int cnbins, const int cblock_hist_size, const int img_block_width, const int cnbins, const int cblock_hist_size, const int img_block_width,
const int blocks_in_group, const int blocks_total, const int blocks_in_group, const int blocks_total,
const int grad_quadstep, const int qangle_step, const int grad_quadstep, const int qangle_step,
__global const float* grad, __global const QANGLE_TYPE* qangle, __global const float* grad, __global const uchar* qangle,
__global const float* gauss_w_lut, __global const float* gauss_w_lut,
__global float* block_hists, __local float* smem) __global float* block_hists, __local float* smem)
{ {
@ -94,7 +86,7 @@ __kernel void compute_hists_lut_kernel(
__global const float* grad_ptr = (gid < blocks_total) ? __global const float* grad_ptr = (gid < blocks_total) ?
grad + offset_y * grad_quadstep + (offset_x << 1) : grad; grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
__global const QANGLE_TYPE* qangle_ptr = (gid < blocks_total) ? __global const uchar* qangle_ptr = (gid < blocks_total) ?
qangle + offset_y * qangle_step + (offset_x << 1) : qangle; qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
@ -109,7 +101,7 @@ __kernel void compute_hists_lut_kernel(
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y) for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
{ {
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]); float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
QANGLE_TYPE2 bin = (QANGLE_TYPE2) (qangle_ptr[0], qangle_ptr[1]); uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
grad_ptr += grad_quadstep; grad_ptr += grad_quadstep;
qangle_ptr += qangle_step; qangle_ptr += qangle_step;
@ -566,7 +558,7 @@ __kernel void extract_descrs_by_cols_kernel(
__kernel void compute_gradients_8UC4_kernel( __kernel void compute_gradients_8UC4_kernel(
const int height, const int width, const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step, const int img_step, const int grad_quadstep, const int qangle_step,
const __global uchar4 * img, __global float * grad, __global QANGLE_TYPE * qangle, const __global uchar4 * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins) const float angle_scale, const char correct_gamma, const int cnbins)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
@ -668,7 +660,7 @@ __kernel void compute_gradients_8UC4_kernel(
__kernel void compute_gradients_8UC1_kernel( __kernel void compute_gradients_8UC1_kernel(
const int height, const int width, const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step, const int img_step, const int grad_quadstep, const int qangle_step,
__global const uchar * img, __global float * grad, __global QANGLE_TYPE * qangle, __global const uchar * img, __global float * grad, __global uchar * qangle,
const float angle_scale, const char correct_gamma, const int cnbins) const float angle_scale, const char correct_gamma, const int cnbins)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);