Merge pull request #1061 from bitwangyaoyao:2.4_fix2
This commit is contained in:
@@ -1816,8 +1816,14 @@ void cv::ocl::device::hog::normalize_hists(int nbins,
|
||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
||||
localThreads, args, -1, -1, "-D CPU");
|
||||
else
|
||||
{
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
||||
char opt[32] = {0};
|
||||
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
|
||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
||||
localThreads, args, -1, -1);
|
||||
localThreads, args, -1, -1, opt);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
|
||||
@@ -1879,8 +1885,14 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width,
|
||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
||||
localThreads, args, -1, -1, "-D CPU");
|
||||
else
|
||||
{
|
||||
cl_kernel kernel = openCLGetKernelFromSource(clCxt, &objdetect_hog, kernelName);
|
||||
int wave_size = queryDeviceInfo<WAVEFRONT_SIZE, int>(kernel);
|
||||
char opt[32] = {0};
|
||||
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
|
||||
openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads,
|
||||
localThreads, args, -1, -1);
|
||||
localThreads, args, -1, -1, opt);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width,
|
||||
|
@@ -133,7 +133,9 @@ __kernel void compute_hists_lut_kernel(
|
||||
final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
|
||||
hist_[0] + hist_[1] + hist_[2];
|
||||
}
|
||||
#ifdef CPU
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
|
||||
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
|
||||
if ((tid < cblock_hist_size) && (gid < blocks_total))
|
||||
@@ -225,8 +227,9 @@ __kernel void compute_hists_kernel(
|
||||
final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] =
|
||||
hist_[0] + hist_[1] + hist_[2];
|
||||
}
|
||||
#ifdef CPU
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#endif
|
||||
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x;
|
||||
if ((tid < cblock_hist_size) && (gid < blocks_total))
|
||||
{
|
||||
@@ -318,6 +321,10 @@ float reduce_smem(volatile __local float* smem, int size)
|
||||
if (tid < 32)
|
||||
{
|
||||
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
|
||||
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
|
||||
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
|
||||
@@ -418,6 +425,9 @@ __kernel void classify_hists_180_kernel(
|
||||
{
|
||||
smem[tid] = product = product + smem[tid + 32];
|
||||
}
|
||||
#if WAVE_SIZE < 32
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#endif
|
||||
if (tid < 16)
|
||||
{
|
||||
smem[tid] = product = product + smem[tid + 16];
|
||||
@@ -487,6 +497,10 @@ __kernel void classify_hists_252_kernel(
|
||||
if (tid < 32)
|
||||
{
|
||||
smem[tid] = product = product + smem[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem[tid] = product = product + smem[tid + 16];
|
||||
smem[tid] = product = product + smem[tid + 8];
|
||||
smem[tid] = product = product + smem[tid + 4];
|
||||
@@ -553,6 +567,10 @@ __kernel void classify_hists_kernel(
|
||||
if (tid < 32)
|
||||
{
|
||||
smem[tid] = product = product + smem[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (tid < 16) {
|
||||
#endif
|
||||
smem[tid] = product = product + smem[tid + 16];
|
||||
smem[tid] = product = product + smem[tid + 8];
|
||||
smem[tid] = product = product + smem[tid + 4];
|
||||
|
@@ -258,27 +258,13 @@ float sobel(__global unsigned char *input, int x, int y, int rows, int cols)
|
||||
|
||||
float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
|
||||
{
|
||||
float cache = 0;
|
||||
float cache2 = 0;
|
||||
int winsz2 = winsz/2;
|
||||
unsigned int cache = cols[0];
|
||||
|
||||
int x = get_local_id(0);
|
||||
int group_size_x = get_local_size(0);
|
||||
|
||||
for(int i = 1; i <= winsz2; i++)
|
||||
#pragma unroll
|
||||
for(int i = 1; i <= winsz; i++)
|
||||
cache += cols[i];
|
||||
|
||||
cols_cache[0] = cache;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (x < group_size_x - winsz2)
|
||||
cache2 = cols_cache[winsz2];
|
||||
else
|
||||
for(int i = winsz2 + 1; i < winsz; i++)
|
||||
cache2 += cols[i];
|
||||
|
||||
return cols[0] + cache + cache2;
|
||||
return cache;
|
||||
}
|
||||
|
||||
#define RpT (2 * ROWSperTHREAD) // got experimentally
|
||||
@@ -301,8 +287,7 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
|
||||
int beg_row = group_id_y * RpT;
|
||||
int end_row = min(beg_row + RpT, disp_rows);
|
||||
|
||||
// if (x < disp_cols)
|
||||
// {
|
||||
|
||||
int y = beg_row;
|
||||
|
||||
float sum = 0;
|
||||
@@ -340,11 +325,15 @@ __kernel void textureness_kernel(__global unsigned char *disp, int disp_rows, in
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
|
||||
if (sum_win < threshold)
|
||||
disp[y * disp_step + x] = 0;
|
||||
|
||||
if (x < disp_cols)
|
||||
{
|
||||
float sum_win = CalcSums(cols, cols_cache + local_id_x, winsz) * 255;
|
||||
if (sum_win < threshold)
|
||||
disp[y * disp_step + x] = 0;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
// }
|
||||
|
||||
}
|
||||
|
@@ -118,6 +118,8 @@ int main(int argc, char **argv)
|
||||
|
||||
setDevice(oclinfo[pid], device);
|
||||
|
||||
setBinaryDiskCache(CACHE_UPDATE);
|
||||
|
||||
cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl;
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
@@ -146,17 +146,17 @@ TEST_P(HOG, Detect)
|
||||
if (winSize.width == 48 && winSize.height == 96)
|
||||
{
|
||||
// daimler's base
|
||||
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96());
|
||||
ocl_hog.setSVMDetector(hog.getDaimlerPeopleDetector());
|
||||
hog.setSVMDetector(hog.getDaimlerPeopleDetector());
|
||||
}
|
||||
else if (winSize.width == 64 && winSize.height == 128)
|
||||
{
|
||||
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128());
|
||||
ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||
hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||
}
|
||||
else
|
||||
{
|
||||
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
|
||||
ocl_hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||
hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user