Merge pull request #5115 from ManuelFreudenreich:hog_variable
This commit is contained in:
commit
96fa0ef752
@ -49,11 +49,6 @@
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
// Other values are not supported
|
||||
#define CELL_WIDTH 8
|
||||
#define CELL_HEIGHT 8
|
||||
#define CELLS_PER_BLOCK_X 2
|
||||
#define CELLS_PER_BLOCK_Y 2
|
||||
|
||||
namespace hog
|
||||
{
|
||||
@ -62,6 +57,8 @@ namespace cv { namespace cuda { namespace device
|
||||
__constant__ int cblock_stride_y;
|
||||
__constant__ int cnblocks_win_x;
|
||||
__constant__ int cnblocks_win_y;
|
||||
__constant__ int cncells_block_x;
|
||||
__constant__ int cncells_block_y;
|
||||
__constant__ int cblock_hist_size;
|
||||
__constant__ int cblock_hist_size_2up;
|
||||
__constant__ int cdescr_size;
|
||||
@ -72,31 +69,47 @@ namespace cv { namespace cuda { namespace device
|
||||
the typical GPU thread count (pert block) values */
|
||||
int power_2up(unsigned int n)
|
||||
{
|
||||
if (n < 1) return 1;
|
||||
else if (n < 2) return 2;
|
||||
else if (n < 4) return 4;
|
||||
else if (n < 8) return 8;
|
||||
else if (n < 16) return 16;
|
||||
else if (n < 32) return 32;
|
||||
else if (n < 64) return 64;
|
||||
else if (n < 128) return 128;
|
||||
else if (n < 256) return 256;
|
||||
else if (n < 512) return 512;
|
||||
else if (n < 1024) return 1024;
|
||||
if (n <= 1) return 1;
|
||||
else if (n <= 2) return 2;
|
||||
else if (n <= 4) return 4;
|
||||
else if (n <= 8) return 8;
|
||||
else if (n <= 16) return 16;
|
||||
else if (n <= 32) return 32;
|
||||
else if (n <= 64) return 64;
|
||||
else if (n <= 128) return 128;
|
||||
else if (n <= 256) return 256;
|
||||
else if (n <= 512) return 512;
|
||||
else if (n <= 1024) return 1024;
|
||||
return -1; // Input is too big
|
||||
}
|
||||
|
||||
/* Returns the max size for nblocks */
|
||||
int max_nblocks(int nthreads, int ncells_block = 1)
|
||||
{
|
||||
int threads = nthreads * ncells_block;
|
||||
if(threads * 4 <= 256)
|
||||
return 4;
|
||||
else if(threads * 3 <= 256)
|
||||
return 3;
|
||||
else if(threads * 2 <= 256)
|
||||
return 2;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y)
|
||||
int nblocks_win_x, int nblocks_win_y, int ncells_block_x, int ncells_block_y)
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnbins, &nbins, sizeof(nbins)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_x, &block_stride_x, sizeof(block_stride_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_stride_y, &block_stride_y, sizeof(block_stride_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_x, &nblocks_win_x, sizeof(nblocks_win_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cnblocks_win_y, &nblocks_win_y, sizeof(nblocks_win_y)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cncells_block_x, &ncells_block_x, sizeof(ncells_block_x)) );
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cncells_block_y, &ncells_block_y, sizeof(ncells_block_y)) );
|
||||
|
||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||
int block_hist_size = nbins * ncells_block_x * ncells_block_y;
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cblock_hist_size, &block_hist_size, sizeof(block_hist_size)) );
|
||||
|
||||
int block_hist_size_2up = power_2up(block_hist_size);
|
||||
@ -112,44 +125,48 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Histogram computation
|
||||
|
||||
|
||||
//
|
||||
// CUDA kernel to compute the histograms
|
||||
template <int nblocks> // Number of histogram blocks processed by single GPU thread block
|
||||
__global__ void compute_hists_kernel_many_blocks(const int img_block_width, const PtrStepf grad,
|
||||
const PtrStepb qangle, float scale, float* block_hists)
|
||||
const PtrStepb qangle, float scale, float* block_hists,
|
||||
int cell_size, int patch_size, int block_patch_size,
|
||||
int threads_cell, int threads_block, int half_cell_size)
|
||||
{
|
||||
const int block_x = threadIdx.z;
|
||||
const int cell_x = threadIdx.x / 16;
|
||||
const int cell_x = threadIdx.x / threads_cell;
|
||||
const int cell_y = threadIdx.y;
|
||||
const int cell_thread_x = threadIdx.x & 0xF;
|
||||
const int cell_thread_x = threadIdx.x & (threads_cell - 1);
|
||||
|
||||
if (blockIdx.x * blockDim.z + block_x >= img_block_width)
|
||||
return;
|
||||
|
||||
extern __shared__ float smem[];
|
||||
float* hists = smem;
|
||||
float* final_hist = smem + cnbins * 48 * nblocks;
|
||||
float* final_hist = smem + cnbins * block_patch_size * nblocks;
|
||||
|
||||
const int offset_x = (blockIdx.x * blockDim.z + block_x) * cblock_stride_x +
|
||||
4 * cell_x + cell_thread_x;
|
||||
const int offset_y = blockIdx.y * cblock_stride_y + 4 * cell_y;
|
||||
|
||||
const float* grad_ptr = grad.ptr(offset_y) + offset_x * 2;
|
||||
const unsigned char* qangle_ptr = qangle.ptr(offset_y) + offset_x * 2;
|
||||
|
||||
// 12 means that 12 pixels affect on block's cell (in one row)
|
||||
if (cell_thread_x < 12)
|
||||
// patch_size means that patch_size pixels affect on block's cell
|
||||
if (cell_thread_x < patch_size)
|
||||
{
|
||||
float* hist = hists + 12 * (cell_y * blockDim.z * CELLS_PER_BLOCK_Y +
|
||||
cell_x + block_x * CELLS_PER_BLOCK_X) +
|
||||
const int offset_x = (blockIdx.x * blockDim.z + block_x) * cblock_stride_x +
|
||||
half_cell_size * cell_x + cell_thread_x;
|
||||
const int offset_y = blockIdx.y * cblock_stride_y + half_cell_size * cell_y;
|
||||
|
||||
const float* grad_ptr = grad.ptr(offset_y) + offset_x * 2;
|
||||
const unsigned char* qangle_ptr = qangle.ptr(offset_y) + offset_x * 2;
|
||||
|
||||
|
||||
float* hist = hists + patch_size * (cell_y * blockDim.z * cncells_block_y +
|
||||
cell_x + block_x * cncells_block_x) +
|
||||
cell_thread_x;
|
||||
for (int bin_id = 0; bin_id < cnbins; ++bin_id)
|
||||
hist[bin_id * 48 * nblocks] = 0.f;
|
||||
hist[bin_id * block_patch_size * nblocks] = 0.f;
|
||||
|
||||
const int dist_x = -4 + (int)cell_thread_x - 4 * cell_x;
|
||||
//(dist_x, dist_y) : distance between current pixel in patch and cell's center
|
||||
const int dist_x = -half_cell_size + (int)cell_thread_x - half_cell_size * cell_x;
|
||||
|
||||
const int dist_y_begin = -4 - 4 * (int)threadIdx.y;
|
||||
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
|
||||
const int dist_y_begin = -half_cell_size - half_cell_size * (int)threadIdx.y;
|
||||
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + patch_size; ++dist_y)
|
||||
{
|
||||
float2 vote = *(const float2*)grad_ptr;
|
||||
uchar2 bin = *(const uchar2*)qangle_ptr;
|
||||
@ -157,25 +174,29 @@ namespace cv { namespace cuda { namespace device
|
||||
grad_ptr += grad.step/sizeof(float);
|
||||
qangle_ptr += qangle.step;
|
||||
|
||||
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
|
||||
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
|
||||
//(dist_center_x, dist_center_y) : distance between current pixel in patch and block's center
|
||||
int dist_center_y = dist_y - half_cell_size * (1 - 2 * cell_y);
|
||||
int dist_center_x = dist_x - half_cell_size * (1 - 2 * cell_x);
|
||||
|
||||
float gaussian = ::expf(-(dist_center_y * dist_center_y +
|
||||
dist_center_x * dist_center_x) * scale);
|
||||
float interp_weight = (8.f - ::fabs(dist_y + 0.5f)) *
|
||||
(8.f - ::fabs(dist_x + 0.5f)) / 64.f;
|
||||
|
||||
hist[bin.x * 48 * nblocks] += gaussian * interp_weight * vote.x;
|
||||
hist[bin.y * 48 * nblocks] += gaussian * interp_weight * vote.y;
|
||||
float interp_weight = ((float)cell_size - ::fabs(dist_y + 0.5f)) *
|
||||
((float)cell_size - ::fabs(dist_x + 0.5f)) / (float)threads_block;
|
||||
|
||||
hist[bin.x * block_patch_size * nblocks] += gaussian * interp_weight * vote.x;
|
||||
hist[bin.y * block_patch_size * nblocks] += gaussian * interp_weight * vote.y;
|
||||
}
|
||||
|
||||
//reduction of the histograms
|
||||
volatile float* hist_ = hist;
|
||||
for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48 * nblocks)
|
||||
for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += block_patch_size * nblocks)
|
||||
{
|
||||
if (cell_thread_x < 6) hist_[0] += hist_[6];
|
||||
if (cell_thread_x < 3) hist_[0] += hist_[3];
|
||||
if (cell_thread_x < patch_size/2) hist_[0] += hist_[patch_size/2];
|
||||
if (cell_thread_x < patch_size/4 && (!((patch_size/4) < 3 && cell_thread_x == 0)))
|
||||
hist_[0] += hist_[patch_size/4];
|
||||
if (cell_thread_x == 0)
|
||||
final_hist[((cell_x + block_x * 2) * 2 + cell_y) * cnbins + bin_id]
|
||||
final_hist[((cell_x + block_x * cncells_block_x) * cncells_block_y + cell_y) * cnbins + bin_id]
|
||||
= hist_[0] + hist_[1] + hist_[2];
|
||||
}
|
||||
}
|
||||
@ -186,37 +207,69 @@ namespace cv { namespace cuda { namespace device
|
||||
blockIdx.x * blockDim.z + block_x) *
|
||||
cblock_hist_size;
|
||||
|
||||
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
|
||||
//copying from final_hist to block_hist
|
||||
int tid;
|
||||
if(threads_cell < cnbins)
|
||||
{
|
||||
tid = (cell_y * cncells_block_y + cell_x) * cnbins + cell_thread_x;
|
||||
} else
|
||||
{
|
||||
tid = (cell_y * cncells_block_y + cell_x) * threads_cell + cell_thread_x;
|
||||
}
|
||||
if (tid < cblock_hist_size)
|
||||
{
|
||||
block_hist[tid] = final_hist[block_x * cblock_hist_size + tid];
|
||||
if(threads_cell < cnbins && cell_thread_x == (threads_cell-1))
|
||||
{
|
||||
for(int i=1;i<=(cnbins - threads_cell);++i)
|
||||
{
|
||||
block_hist[tid + i] = final_hist[block_x * cblock_hist_size + tid + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//declaration of variables and invoke the kernel with the calculated number of blocks
|
||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, const PtrStepSzf& grad,
|
||||
const PtrStepSzb& qangle, float sigma, float* block_hists)
|
||||
const PtrStepSzb& qangle, float sigma, float* block_hists,
|
||||
int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y)
|
||||
{
|
||||
const int nblocks = 1;
|
||||
const int ncells_block = ncells_block_x * ncells_block_y;
|
||||
const int patch_side = cell_size_x / 4;
|
||||
const int patch_size = cell_size_x + (patch_side * 2);
|
||||
const int block_patch_size = ncells_block * patch_size;
|
||||
const int threads_cell = power_2up(patch_size);
|
||||
const int threads_block = ncells_block * threads_cell;
|
||||
const int half_cell_size = cell_size_x / 2;
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) /
|
||||
block_stride_x;
|
||||
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) /
|
||||
int img_block_height = (height - ncells_block_y * cell_size_y + block_stride_y) /
|
||||
block_stride_y;
|
||||
|
||||
const int nblocks = max_nblocks(threads_cell, ncells_block);
|
||||
dim3 grid(divUp(img_block_width, nblocks), img_block_height);
|
||||
dim3 threads(32, 2, nblocks);
|
||||
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
|
||||
cudaFuncCachePreferL1));
|
||||
dim3 threads(threads_cell * ncells_block_x, ncells_block_y, nblocks);
|
||||
|
||||
// Precompute gaussian spatial window parameter
|
||||
float scale = 1.f / (2.f * sigma * sigma);
|
||||
|
||||
int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12 * nblocks) * sizeof(float);
|
||||
int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * nblocks) * sizeof(float);
|
||||
int hists_size = (nbins * ncells_block * patch_size * nblocks) * sizeof(float);
|
||||
int final_hists_size = (nbins * ncells_block * nblocks) * sizeof(float);
|
||||
int smem = hists_size + final_hists_size;
|
||||
compute_hists_kernel_many_blocks<nblocks><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists);
|
||||
if (nblocks == 4)
|
||||
compute_hists_kernel_many_blocks<4><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
else if (nblocks == 3)
|
||||
compute_hists_kernel_many_blocks<3><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
else if (nblocks == 2)
|
||||
compute_hists_kernel_many_blocks<2><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
else
|
||||
compute_hists_kernel_many_blocks<1><<<grid, threads, smem>>>(
|
||||
img_block_width, grad, qangle, scale, block_hists, cell_size_x, patch_size, block_patch_size, threads_cell, threads_block, half_cell_size);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
@ -293,16 +346,16 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
|
||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, float* block_hists, float threshold)
|
||||
int height, int width, float* block_hists, float threshold, int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y)
|
||||
{
|
||||
const int nblocks = 1;
|
||||
|
||||
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
|
||||
int block_hist_size = nbins * ncells_block_x * ncells_block_y;
|
||||
int nthreads = power_2up(block_hist_size);
|
||||
dim3 threads(nthreads, 1, nblocks);
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
|
||||
int img_block_height = (height - ncells_block_y * cell_size_y + block_stride_y) / block_stride_y;
|
||||
dim3 grid(divUp(img_block_width, nblocks), img_block_height);
|
||||
|
||||
if (nthreads == 32)
|
||||
@ -310,7 +363,7 @@ namespace cv { namespace cuda { namespace device
|
||||
else if (nthreads == 64)
|
||||
normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 128)
|
||||
normalize_hists_kernel_many_blocks<64, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
normalize_hists_kernel_many_blocks<128, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 256)
|
||||
normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 512)
|
||||
@ -365,7 +418,7 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, float *confidences)
|
||||
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
const int nblocks = 1;
|
||||
@ -381,7 +434,7 @@ namespace cv { namespace cuda { namespace device
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks<nthreads, nblocks>,
|
||||
cudaFuncCachePreferL1));
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) /
|
||||
block_stride_x;
|
||||
compute_confidence_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
|
||||
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
|
||||
@ -427,7 +480,7 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
void classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, unsigned char* labels)
|
||||
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, unsigned char* labels)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
const int nblocks = 1;
|
||||
@ -442,7 +495,7 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
|
||||
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
|
||||
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
|
||||
block_hists, coefs, free_coef, threshold, labels);
|
||||
@ -477,7 +530,7 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
|
||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
|
||||
int height, int width, float* block_hists, PtrStepSzf descriptors)
|
||||
int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, PtrStepSzf descriptors)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
|
||||
@ -488,7 +541,7 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 threads(nthreads, 1);
|
||||
dim3 grid(img_win_width, img_win_height);
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
|
||||
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
|
||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@ -525,7 +578,7 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
|
||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
|
||||
PtrStepSzf descriptors)
|
||||
{
|
||||
const int nthreads = 256;
|
||||
@ -537,7 +590,7 @@ namespace cv { namespace cuda { namespace device
|
||||
dim3 threads(nthreads, 1);
|
||||
dim3 grid(img_win_width, img_win_height);
|
||||
|
||||
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
|
||||
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
|
||||
extract_descrs_by_cols_kernel<nthreads><<<grid, threads>>>(
|
||||
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
@ -51,34 +51,45 @@ Ptr<cuda::HOG> cv::cuda::HOG::create(Size, Size, Size, Size, int) { throw_no_cud
|
||||
|
||||
#else
|
||||
|
||||
/****************************************************************************************\
|
||||
The code below is implementation of HOG (Histogram-of-Oriented Gradients)
|
||||
descriptor and object detection, introduced by Navneet Dalal and Bill Triggs.
|
||||
|
||||
The computed feature vectors are compatible with the
|
||||
INRIA Object Detection and Localization Toolkit
|
||||
(http://pascal.inrialpes.fr/soft/olt/)
|
||||
\****************************************************************************************/
|
||||
|
||||
namespace cv { namespace cuda { namespace device
|
||||
{
|
||||
namespace hog
|
||||
{
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y);
|
||||
int nblocks_win_x, int nblocks_win_y,
|
||||
int ncells_block_x, int ncells_block_y);
|
||||
|
||||
void compute_hists(int nbins, int block_stride_x, int blovck_stride_y,
|
||||
int height, int width, const cv::cuda::PtrStepSzf& grad,
|
||||
const cv::cuda::PtrStepSzb& qangle, float sigma, float* block_hists);
|
||||
void compute_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, const PtrStepSzf& grad,
|
||||
const PtrStepSzb& qangle, float sigma, float* block_hists,
|
||||
int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y);
|
||||
|
||||
void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
int height, int width, float* block_hists, float threshold);
|
||||
int height, int width, float* block_hists, float threshold, int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y);
|
||||
|
||||
void classify_hists(int win_height, int win_width, int block_stride_y,
|
||||
int block_stride_x, int win_stride_y, int win_stride_x, int height,
|
||||
int width, float* block_hists, float* coefs, float free_coef,
|
||||
float threshold, unsigned char* labels);
|
||||
float threshold, int cell_size_x, int ncells_block_x, unsigned char* labels);
|
||||
|
||||
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
float* coefs, float free_coef, float threshold, float *confidences);
|
||||
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
|
||||
|
||||
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
|
||||
cv::cuda::PtrStepSzf descriptors);
|
||||
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
|
||||
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x,
|
||||
cv::cuda::PtrStepSzf descriptors);
|
||||
|
||||
void compute_gradients_8UC1(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img,
|
||||
@ -167,6 +178,7 @@ namespace
|
||||
double scale0_;
|
||||
int group_threshold_;
|
||||
int descr_format_;
|
||||
Size cells_per_block_;
|
||||
|
||||
private:
|
||||
int getTotalHistSize(Size img_size) const;
|
||||
@ -197,7 +209,8 @@ namespace
|
||||
win_stride_(block_stride),
|
||||
scale0_(1.05),
|
||||
group_threshold_(2),
|
||||
descr_format_(DESCR_FORMAT_COL_BY_COL)
|
||||
descr_format_(DESCR_FORMAT_COL_BY_COL),
|
||||
cells_per_block_(block_size.width / cell_size.width, block_size.height / cell_size.height)
|
||||
{
|
||||
CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 &&
|
||||
(win_size.height - block_size.height) % block_stride.height == 0);
|
||||
@ -205,12 +218,13 @@ namespace
|
||||
CV_Assert(block_size.width % cell_size.width == 0 &&
|
||||
block_size.height % cell_size.height == 0);
|
||||
|
||||
CV_Assert(block_stride == cell_size);
|
||||
// Navneet Dalal and Bill Triggs. Histograms of oriented gradients for
|
||||
// human detection. In International Conference on Computer Vision and
|
||||
// Pattern Recognition, volume 2, pages 886–893, June 2005
|
||||
// http://lear.inrialpes.fr/people/triggs/pubs/Dalal-cvpr05.pdf (28.07.2015) [Figure 5]
|
||||
CV_Assert(block_stride == (block_size / 2));
|
||||
|
||||
CV_Assert(cell_size == Size(8, 8));
|
||||
|
||||
Size cells_per_block(block_size.width / cell_size.width, block_size.height / cell_size.height);
|
||||
CV_Assert(cells_per_block == Size(2, 2));
|
||||
CV_Assert(cell_size.width == cell_size.height);
|
||||
}
|
||||
|
||||
static int numPartsWithin(int size, int part_size, int stride)
|
||||
@ -231,8 +245,7 @@ namespace
|
||||
|
||||
size_t HOG_Impl::getBlockHistogramSize() const
|
||||
{
|
||||
Size cells_per_block(block_size_.width / cell_size_.width, block_size_.height / cell_size_.height);
|
||||
return nbins_ * cells_per_block.area();
|
||||
return nbins_ * cells_per_block_.area();
|
||||
}
|
||||
|
||||
double HOG_Impl::getWinSigma() const
|
||||
@ -313,6 +326,7 @@ namespace
|
||||
detector_.ptr<float>(),
|
||||
(float)free_coef_,
|
||||
(float)hit_threshold_,
|
||||
cell_size_.width, cells_per_block_.width,
|
||||
labels.ptr());
|
||||
|
||||
Mat labels_host;
|
||||
@ -339,6 +353,7 @@ namespace
|
||||
detector_.ptr<float>(),
|
||||
(float)free_coef_,
|
||||
(float)hit_threshold_,
|
||||
cell_size_.width, cells_per_block_.width,
|
||||
labels.ptr<float>());
|
||||
|
||||
Mat labels_host;
|
||||
@ -465,6 +480,7 @@ namespace
|
||||
win_stride_.height, win_stride_.width,
|
||||
img.rows, img.cols,
|
||||
block_hists.ptr<float>(),
|
||||
cell_size_.width, cells_per_block_.width,
|
||||
descriptors);
|
||||
break;
|
||||
case DESCR_FORMAT_COL_BY_COL:
|
||||
@ -473,6 +489,7 @@ namespace
|
||||
win_stride_.height, win_stride_.width,
|
||||
img.rows, img.cols,
|
||||
block_hists.ptr<float>(),
|
||||
cell_size_.width, cells_per_block_.width,
|
||||
descriptors);
|
||||
break;
|
||||
default:
|
||||
@ -490,7 +507,7 @@ namespace
|
||||
void HOG_Impl::computeBlockHistograms(const GpuMat& img, GpuMat& block_hists)
|
||||
{
|
||||
cv::Size blocks_per_win = numPartsWithin(win_size_, block_size_, block_stride_);
|
||||
hog::set_up_constants(nbins_, block_stride_.width, block_stride_.height, blocks_per_win.width, blocks_per_win.height);
|
||||
hog::set_up_constants(nbins_, block_stride_.width, block_stride_.height, blocks_per_win.width, blocks_per_win.height, cells_per_block_.width, cells_per_block_.height);
|
||||
|
||||
BufferPool pool(Stream::Null());
|
||||
|
||||
@ -505,13 +522,17 @@ namespace
|
||||
img.rows, img.cols,
|
||||
grad, qangle,
|
||||
(float)getWinSigma(),
|
||||
block_hists.ptr<float>());
|
||||
block_hists.ptr<float>(),
|
||||
cell_size_.width, cell_size_.height,
|
||||
cells_per_block_.width, cells_per_block_.height);
|
||||
|
||||
hog::normalize_hists(nbins_,
|
||||
block_stride_.width, block_stride_.height,
|
||||
img.rows, img.cols,
|
||||
block_hists.ptr<float>(),
|
||||
(float)threshold_L2hys_);
|
||||
(float)threshold_L2hys_,
|
||||
cell_size_.width, cell_size_.height,
|
||||
cells_per_block_.width, cells_per_block_.height);
|
||||
}
|
||||
|
||||
void HOG_Impl::computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle)
|
||||
|
@ -217,9 +217,9 @@ CUDA_TEST_P(HOG, GetDescriptors)
|
||||
r[(x * blocks_per_win_y + y) * block_hist_size + k]);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
INSTANTIATE_TEST_CASE_P(CUDA_ObjDetect, HOG, ALL_DEVICES);
|
||||
|
||||
*/
|
||||
//============== caltech hog tests =====================//
|
||||
|
||||
struct CalTech : public ::testing::TestWithParam<std::tr1::tuple<cv::cuda::DeviceInfo, std::string> >
|
||||
@ -269,8 +269,204 @@ INSTANTIATE_TEST_CASE_P(detect, CalTech, testing::Combine(ALL_DEVICES,
|
||||
"caltech/image_00000527_0.png", "caltech/image_00000574_0.png")));
|
||||
|
||||
|
||||
//------------------------variable GPU HOG Tests------------------------//
|
||||
struct Hog_var : public ::testing::TestWithParam<std::tr1::tuple<cv::cuda::DeviceInfo, std::string> >
|
||||
{
|
||||
cv::cuda::DeviceInfo devInfo;
|
||||
cv::Mat img, c_img;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
cv::cuda::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Rect roi(0, 0, 16, 32);
|
||||
img = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
c_img = img(roi);
|
||||
}
|
||||
};
|
||||
|
||||
CUDA_TEST_P(Hog_var, HOG)
|
||||
{
|
||||
cv::cuda::GpuMat _img(c_img);
|
||||
cv::cuda::GpuMat d_img;
|
||||
|
||||
int win_stride_width = 8;int win_stride_height = 8;
|
||||
int win_width = 16;
|
||||
int block_width = 8;
|
||||
int block_stride_width = 4;int block_stride_height = 4;
|
||||
int cell_width = 4;
|
||||
int nbins = 9;
|
||||
|
||||
Size win_stride(win_stride_width, win_stride_height);
|
||||
Size win_size(win_width, win_width * 2);
|
||||
Size block_size(block_width, block_width);
|
||||
Size block_stride(block_stride_width, block_stride_height);
|
||||
Size cell_size(cell_width, cell_width);
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, nbins);
|
||||
|
||||
gpu_hog->setNumLevels(13);
|
||||
gpu_hog->setHitThreshold(0);
|
||||
gpu_hog->setWinStride(win_stride);
|
||||
gpu_hog->setScaleFactor(1.05);
|
||||
gpu_hog->setGroupThreshold(8);
|
||||
gpu_hog->compute(_img, d_img);
|
||||
|
||||
vector<float> gpu_desc_vec;
|
||||
ASSERT_TRUE(gpu_desc_vec.empty());
|
||||
cv::Mat R(d_img);
|
||||
|
||||
cv::HOGDescriptor cpu_hog(win_size, block_size, block_stride, cell_size, nbins);
|
||||
cpu_hog.nlevels = 13;
|
||||
vector<float> cpu_desc_vec;
|
||||
ASSERT_TRUE(cpu_desc_vec.empty());
|
||||
cpu_hog.compute(c_img, cpu_desc_vec, win_stride, Size(0,0));
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(detect, Hog_var, testing::Combine(ALL_DEVICES,
|
||||
::testing::Values<std::string>("/hog/road.png")));
|
||||
|
||||
struct Hog_var_cell : public ::testing::TestWithParam<std::tr1::tuple<cv::cuda::DeviceInfo, std::string> >
|
||||
{
|
||||
cv::cuda::DeviceInfo devInfo;
|
||||
cv::Mat img, c_img, c_img2, c_img3, c_img4;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GET_PARAM(0);
|
||||
cv::cuda::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Rect roi(0, 0, 48, 96);
|
||||
img = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
c_img = img(roi);
|
||||
|
||||
cv::Rect roi2(0, 0, 54, 108);
|
||||
c_img2 = img(roi2);
|
||||
|
||||
cv::Rect roi3(0, 0, 64, 128);
|
||||
c_img3 = img(roi3);
|
||||
|
||||
cv::Rect roi4(0, 0, 32, 64);
|
||||
c_img4 = img(roi4);
|
||||
}
|
||||
};
|
||||
|
||||
CUDA_TEST_P(Hog_var_cell, HOG)
|
||||
{
|
||||
cv::cuda::GpuMat _img(c_img);
|
||||
cv::cuda::GpuMat _img2(c_img2);
|
||||
cv::cuda::GpuMat _img3(c_img3);
|
||||
cv::cuda::GpuMat _img4(c_img4);
|
||||
cv::cuda::GpuMat d_img;
|
||||
|
||||
ASSERT_FALSE(_img.empty());
|
||||
ASSERT_TRUE(d_img.empty());
|
||||
|
||||
int win_stride_width = 8;int win_stride_height = 8;
|
||||
int win_width = 48;
|
||||
int block_width = 16;
|
||||
int block_stride_width = 8;int block_stride_height = 8;
|
||||
int cell_width = 8;
|
||||
int nbins = 9;
|
||||
|
||||
Size win_stride(win_stride_width, win_stride_height);
|
||||
Size win_size(win_width, win_width * 2);
|
||||
Size block_size(block_width, block_width);
|
||||
Size block_stride(block_stride_width, block_stride_height);
|
||||
Size cell_size(cell_width, cell_width);
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, nbins);
|
||||
|
||||
gpu_hog->setNumLevels(13);
|
||||
gpu_hog->setHitThreshold(0);
|
||||
gpu_hog->setWinStride(win_stride);
|
||||
gpu_hog->setScaleFactor(1.05);
|
||||
gpu_hog->setGroupThreshold(8);
|
||||
gpu_hog->compute(_img, d_img);
|
||||
//------------------------------------------------------------------------------
|
||||
cv::cuda::GpuMat d_img2;
|
||||
ASSERT_TRUE(d_img2.empty());
|
||||
|
||||
int win_stride_width2 = 8;int win_stride_height2 = 8;
|
||||
int win_width2 = 48;
|
||||
int block_width2 = 16;
|
||||
int block_stride_width2 = 8;int block_stride_height2 = 8;
|
||||
int cell_width2 = 4;
|
||||
|
||||
Size win_stride2(win_stride_width2, win_stride_height2);
|
||||
Size win_size2(win_width2, win_width2 * 2);
|
||||
Size block_size2(block_width2, block_width2);
|
||||
Size block_stride2(block_stride_width2, block_stride_height2);
|
||||
Size cell_size2(cell_width2, cell_width2);
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog2 = cv::cuda::HOG::create(win_size2, block_size2, block_stride2, cell_size2, nbins);
|
||||
gpu_hog2->setWinStride(win_stride2);
|
||||
gpu_hog2->compute(_img, d_img2);
|
||||
//------------------------------------------------------------------------------
|
||||
cv::cuda::GpuMat d_img3;
|
||||
ASSERT_TRUE(d_img3.empty());
|
||||
|
||||
int win_stride_width3 = 9;int win_stride_height3 = 9;
|
||||
int win_width3 = 54;
|
||||
int block_width3 = 18;
|
||||
int block_stride_width3 = 9;int block_stride_height3 = 9;
|
||||
int cell_width3 = 6;
|
||||
|
||||
Size win_stride3(win_stride_width3, win_stride_height3);
|
||||
Size win_size3(win_width3, win_width3 * 2);
|
||||
Size block_size3(block_width3, block_width3);
|
||||
Size block_stride3(block_stride_width3, block_stride_height3);
|
||||
Size cell_size3(cell_width3, cell_width3);
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog3 = cv::cuda::HOG::create(win_size3, block_size3, block_stride3, cell_size3, nbins);
|
||||
gpu_hog3->setWinStride(win_stride3);
|
||||
gpu_hog3->compute(_img2, d_img3);
|
||||
//------------------------------------------------------------------------------
|
||||
cv::cuda::GpuMat d_img4;
|
||||
ASSERT_TRUE(d_img4.empty());
|
||||
|
||||
int win_stride_width4 = 16;int win_stride_height4 = 16;
|
||||
int win_width4 = 64;
|
||||
int block_width4 = 32;
|
||||
int block_stride_width4 = 16;int block_stride_height4 = 16;
|
||||
int cell_width4 = 8;
|
||||
|
||||
Size win_stride4(win_stride_width4, win_stride_height4);
|
||||
Size win_size4(win_width4, win_width4 * 2);
|
||||
Size block_size4(block_width4, block_width4);
|
||||
Size block_stride4(block_stride_width4, block_stride_height4);
|
||||
Size cell_size4(cell_width4, cell_width4);
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog4 = cv::cuda::HOG::create(win_size4, block_size4, block_stride4, cell_size4, nbins);
|
||||
gpu_hog4->setWinStride(win_stride4);
|
||||
gpu_hog4->compute(_img3, d_img4);
|
||||
//------------------------------------------------------------------------------
|
||||
cv::cuda::GpuMat d_img5;
|
||||
ASSERT_TRUE(d_img5.empty());
|
||||
|
||||
int win_stride_width5 = 16;int win_stride_height5 = 16;
|
||||
int win_width5 = 64;
|
||||
int block_width5 = 32;
|
||||
int block_stride_width5 = 16;int block_stride_height5 = 16;
|
||||
int cell_width5 = 16;
|
||||
|
||||
Size win_stride5(win_stride_width5, win_stride_height5);
|
||||
Size win_size5(win_width5, win_width5 * 2);
|
||||
Size block_size5(block_width5, block_width5);
|
||||
Size block_stride5(block_stride_width5, block_stride_height5);
|
||||
Size cell_size5(cell_width5, cell_width5);
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog5 = cv::cuda::HOG::create(win_size5, block_size5, block_stride5, cell_size5, nbins);
|
||||
gpu_hog5->setWinStride(win_stride5);
|
||||
gpu_hog5->compute(_img3, d_img5);
|
||||
//------------------------------------------------------------------------------
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(detect, Hog_var_cell, testing::Combine(ALL_DEVICES,
|
||||
::testing::Values<std::string>("/hog/road.png")));
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
/// LBP classifier
|
||||
|
||||
|
@ -22,10 +22,14 @@ public:
|
||||
static Args read(int argc, char** argv);
|
||||
|
||||
string src;
|
||||
bool src_is_folder;
|
||||
bool src_is_video;
|
||||
bool src_is_camera;
|
||||
int camera_id;
|
||||
|
||||
bool svm_load;
|
||||
string svm;
|
||||
|
||||
bool write_video;
|
||||
string dst_video;
|
||||
double dst_video_fps;
|
||||
@ -44,6 +48,10 @@ public:
|
||||
|
||||
int win_width;
|
||||
int win_stride_width, win_stride_height;
|
||||
int block_width;
|
||||
int block_stride_width, block_stride_height;
|
||||
int cell_width;
|
||||
int nbins;
|
||||
|
||||
bool gamma_corr;
|
||||
};
|
||||
@ -93,6 +101,9 @@ static void printHelp()
|
||||
cout << "Histogram of Oriented Gradients descriptor and detector sample.\n"
|
||||
<< "\nUsage: hog_gpu\n"
|
||||
<< " (<image>|--video <vide>|--camera <camera_id>) # frames source\n"
|
||||
<< " or"
|
||||
<< " (--folder <folder_path>) # load images from folder\n"
|
||||
<< " [--svm <file> # load svm file"
|
||||
<< " [--make_gray <true/false>] # convert image to gray one or not\n"
|
||||
<< " [--resize_src <true/false>] # do resize of the source image or not\n"
|
||||
<< " [--width <int>] # resized image width\n"
|
||||
@ -100,9 +111,14 @@ static void printHelp()
|
||||
<< " [--hit_threshold <double>] # classifying plane distance threshold (0.0 usually)\n"
|
||||
<< " [--scale <double>] # HOG window scale factor\n"
|
||||
<< " [--nlevels <int>] # max number of HOG window scales\n"
|
||||
<< " [--win_width <int>] # width of the window (48 or 64)\n"
|
||||
<< " [--win_width <int>] # width of the window\n"
|
||||
<< " [--win_stride_width <int>] # distance by OX axis between neighbour wins\n"
|
||||
<< " [--win_stride_height <int>] # distance by OY axis between neighbour wins\n"
|
||||
<< " [--block_width <int>] # width of the block\n"
|
||||
<< " [--block_stride_width <int>] # distance by 0X axis between neighbour blocks\n"
|
||||
<< " [--block_stride_height <int>] # distance by 0Y axis between neighbour blocks\n"
|
||||
<< " [--cell_width <int>] # width of the cell\n"
|
||||
<< " [--nbins <int>] # number of bins\n"
|
||||
<< " [--gr_threshold <int>] # merging similar rects constant\n"
|
||||
<< " [--gamma_correct <int>] # do gamma correction or not\n"
|
||||
<< " [--write_video <bool>] # write video or not\n"
|
||||
@ -142,6 +158,8 @@ Args::Args()
|
||||
{
|
||||
src_is_video = false;
|
||||
src_is_camera = false;
|
||||
src_is_folder = false;
|
||||
svm_load = false;
|
||||
camera_id = 0;
|
||||
|
||||
write_video = false;
|
||||
@ -162,6 +180,11 @@ Args::Args()
|
||||
win_width = 48;
|
||||
win_stride_width = 8;
|
||||
win_stride_height = 8;
|
||||
block_width = 16;
|
||||
block_stride_width = 8;
|
||||
block_stride_height = 8;
|
||||
cell_width = 8;
|
||||
nbins = 9;
|
||||
|
||||
gamma_corr = true;
|
||||
}
|
||||
@ -186,6 +209,11 @@ Args Args::read(int argc, char** argv)
|
||||
else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--block_width") args.block_width = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--block_stride_width") args.block_stride_width = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--block_stride_height") args.block_stride_height = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--cell_width") args.cell_width = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--nbins") args.nbins = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]);
|
||||
else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true");
|
||||
else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true");
|
||||
@ -194,6 +222,8 @@ Args Args::read(int argc, char** argv)
|
||||
else if (string(argv[i]) == "--help") printHelp();
|
||||
else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; }
|
||||
else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; }
|
||||
else if (string(argv[i]) == "--folder") { args.src = argv[++i]; args.src_is_folder = true;}
|
||||
else if (string(argv[i]) == "--svm") { args.svm = argv[++i]; args.svm_load = true;}
|
||||
else if (args.src.empty()) args.src = argv[i];
|
||||
else throw runtime_error((string("unknown key: ") + argv[i]));
|
||||
}
|
||||
@ -228,16 +258,17 @@ App::App(const Args& s)
|
||||
|
||||
gamma_corr = args.gamma_corr;
|
||||
|
||||
if (args.win_width != 64 && args.win_width != 48)
|
||||
args.win_width = 64;
|
||||
|
||||
cout << "Scale: " << scale << endl;
|
||||
if (args.resize_src)
|
||||
cout << "Resized source: (" << args.width << ", " << args.height << ")\n";
|
||||
cout << "Group threshold: " << gr_threshold << endl;
|
||||
cout << "Levels number: " << nlevels << endl;
|
||||
cout << "Win width: " << args.win_width << endl;
|
||||
cout << "Win size: (" << args.win_width << ", " << args.win_width*2 << ")\n";
|
||||
cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n";
|
||||
cout << "Block size: (" << args.block_width << ", " << args.block_width << ")\n";
|
||||
cout << "Block stride: (" << args.block_stride_width << ", " << args.block_stride_height << ")\n";
|
||||
cout << "Cell size: (" << args.cell_width << ", " << args.cell_width << ")\n";
|
||||
cout << "Bins number: " << args.nbins << endl;
|
||||
cout << "Hit threshold: " << hit_threshold << endl;
|
||||
cout << "Gamma correction: " << gamma_corr << endl;
|
||||
cout << endl;
|
||||
@ -249,22 +280,58 @@ void App::run()
|
||||
running = true;
|
||||
cv::VideoWriter video_writer;
|
||||
|
||||
Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96)
|
||||
Size win_stride(args.win_stride_width, args.win_stride_height);
|
||||
Size win_size(args.win_width, args.win_width * 2);
|
||||
Size block_size(args.block_width, args.block_width);
|
||||
Size block_stride(args.block_stride_width, args.block_stride_height);
|
||||
Size cell_size(args.cell_width, args.cell_width);
|
||||
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size);
|
||||
cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9);
|
||||
cv::Ptr<cv::cuda::HOG> gpu_hog = cv::cuda::HOG::create(win_size, block_size, block_stride, cell_size, args.nbins);
|
||||
cv::HOGDescriptor cpu_hog(win_size, block_size, block_stride, cell_size, args.nbins);
|
||||
|
||||
// Create HOG descriptors and detectors here
|
||||
Mat detector = gpu_hog->getDefaultPeopleDetector();
|
||||
if(args.svm_load) {
|
||||
std::vector<float> svm_model;
|
||||
const std::string model_file_name = args.svm;
|
||||
FileStorage ifs(model_file_name, FileStorage::READ);
|
||||
if (ifs.isOpened()) {
|
||||
ifs["svm_detector"] >> svm_model;
|
||||
} else {
|
||||
const std::string what =
|
||||
"could not load model for hog classifier from file: "
|
||||
+ model_file_name;
|
||||
throw std::runtime_error(what);
|
||||
}
|
||||
|
||||
gpu_hog->setSVMDetector(detector);
|
||||
cpu_hog.setSVMDetector(detector);
|
||||
// check if the variables are initialized
|
||||
if (svm_model.empty()) {
|
||||
const std::string what =
|
||||
"HoG classifier: svm model could not be loaded from file"
|
||||
+ model_file_name;
|
||||
throw std::runtime_error(what);
|
||||
}
|
||||
|
||||
gpu_hog->setSVMDetector(svm_model);
|
||||
cpu_hog.setSVMDetector(svm_model);
|
||||
} else {
|
||||
// Create HOG descriptors and detectors here
|
||||
Mat detector = gpu_hog->getDefaultPeopleDetector();
|
||||
|
||||
gpu_hog->setSVMDetector(detector);
|
||||
cpu_hog.setSVMDetector(detector);
|
||||
}
|
||||
|
||||
cout << "gpusvmDescriptorSize : " << gpu_hog->getDescriptorSize()
|
||||
<< endl;
|
||||
cout << "cpusvmDescriptorSize : " << cpu_hog.getDescriptorSize()
|
||||
<< endl;
|
||||
|
||||
while (running)
|
||||
{
|
||||
VideoCapture vc;
|
||||
Mat frame;
|
||||
vector<String> filenames;
|
||||
|
||||
unsigned int count = 1;
|
||||
|
||||
if (args.src_is_video)
|
||||
{
|
||||
@ -273,6 +340,14 @@ void App::run()
|
||||
throw runtime_error(string("can't open video file: " + args.src));
|
||||
vc >> frame;
|
||||
}
|
||||
else if (args.src_is_folder) {
|
||||
String folder = args.src;
|
||||
cout << folder << endl;
|
||||
glob(folder, filenames);
|
||||
frame = imread(filenames[count]); // 0 --> .gitignore
|
||||
if (!frame.data)
|
||||
cerr << "Problem loading image from folder!!!" << endl;
|
||||
}
|
||||
else if (args.src_is_camera)
|
||||
{
|
||||
vc.open(args.camera_id);
|
||||
@ -327,7 +402,7 @@ void App::run()
|
||||
{
|
||||
cpu_hog.nlevels = nlevels;
|
||||
cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride,
|
||||
Size(0, 0), scale, gr_threshold);
|
||||
Size(0, 0), scale, gr_threshold);
|
||||
}
|
||||
hogWorkEnd();
|
||||
|
||||
@ -342,11 +417,20 @@ void App::run()
|
||||
putText(img_to_show, "Mode: GPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
|
||||
else
|
||||
putText(img_to_show, "Mode: CPU", Point(5, 25), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
|
||||
putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
|
||||
putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
|
||||
putText(img_to_show, "FPS HOG: " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
|
||||
putText(img_to_show, "FPS total: " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
|
||||
imshow("opencv_gpu_hog", img_to_show);
|
||||
|
||||
if (args.src_is_video || args.src_is_camera) vc >> frame;
|
||||
if (args.src_is_folder) {
|
||||
count++;
|
||||
if (count < filenames.size()) {
|
||||
frame = imread(filenames[count]);
|
||||
} else {
|
||||
Mat empty;
|
||||
frame = empty;
|
||||
}
|
||||
}
|
||||
|
||||
workEnd();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user