add SURF and HOG to ocl module
This commit is contained in:
parent
a2df490914
commit
64e9cf5d75
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
@ -924,6 +925,154 @@ namespace cv
|
||||
const oclMat& src3, double beta, oclMat& dst, int flags = 0);
|
||||
#endif
|
||||
|
||||
//////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector //////////////
|
||||
struct CV_EXPORTS HOGDescriptor
|
||||
{
|
||||
enum { DEFAULT_WIN_SIGMA = -1 };
|
||||
enum { DEFAULT_NLEVELS = 64 };
|
||||
enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL };
|
||||
|
||||
HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16),
|
||||
Size block_stride=Size(8, 8), Size cell_size=Size(8, 8),
|
||||
int nbins=9, double win_sigma=DEFAULT_WIN_SIGMA,
|
||||
double threshold_L2hys=0.2, bool gamma_correction=true,
|
||||
int nlevels=DEFAULT_NLEVELS);
|
||||
|
||||
size_t getDescriptorSize() const;
|
||||
size_t getBlockHistogramSize() const;
|
||||
|
||||
void setSVMDetector(const vector<float>& detector);
|
||||
|
||||
static vector<float> getDefaultPeopleDetector();
|
||||
static vector<float> getPeopleDetector48x96();
|
||||
static vector<float> getPeopleDetector64x128();
|
||||
|
||||
void detect(const oclMat& img, vector<Point>& found_locations,
|
||||
double hit_threshold=0, Size win_stride=Size(),
|
||||
Size padding=Size());
|
||||
|
||||
void detectMultiScale(const oclMat& img, vector<Rect>& found_locations,
|
||||
double hit_threshold=0, Size win_stride=Size(),
|
||||
Size padding=Size(), double scale0=1.05,
|
||||
int group_threshold=2);
|
||||
|
||||
void getDescriptors(const oclMat& img, Size win_stride,
|
||||
oclMat& descriptors,
|
||||
int descr_format=DESCR_FORMAT_COL_BY_COL);
|
||||
|
||||
Size win_size;
|
||||
Size block_size;
|
||||
Size block_stride;
|
||||
Size cell_size;
|
||||
int nbins;
|
||||
double win_sigma;
|
||||
double threshold_L2hys;
|
||||
bool gamma_correction;
|
||||
int nlevels;
|
||||
|
||||
protected:
|
||||
void computeBlockHistograms(const oclMat& img);
|
||||
void computeGradient(const oclMat& img, oclMat& grad, oclMat& qangle);
|
||||
|
||||
double getWinSigma() const;
|
||||
bool checkDetectorSize() const;
|
||||
|
||||
static int numPartsWithin(int size, int part_size, int stride);
|
||||
static Size numPartsWithin(Size size, Size part_size, Size stride);
|
||||
|
||||
// Coefficients of the separating plane
|
||||
float free_coef;
|
||||
oclMat detector;
|
||||
|
||||
// Results of the last classification step
|
||||
oclMat labels;
|
||||
Mat labels_host;
|
||||
|
||||
// Results of the last histogram evaluation step
|
||||
oclMat block_hists;
|
||||
|
||||
// Gradients conputation results
|
||||
oclMat grad, qangle;
|
||||
|
||||
std::vector<oclMat> image_scales;
|
||||
};
|
||||
|
||||
//! Speeded up robust features, port from GPU module.
|
||||
////////////////////////////////// SURF //////////////////////////////////////////
|
||||
class CV_EXPORTS SURF_OCL
|
||||
{
|
||||
public:
|
||||
enum KeypointLayout
|
||||
{
|
||||
X_ROW = 0,
|
||||
Y_ROW,
|
||||
LAPLACIAN_ROW,
|
||||
OCTAVE_ROW,
|
||||
SIZE_ROW,
|
||||
ANGLE_ROW,
|
||||
HESSIAN_ROW,
|
||||
ROWS_COUNT
|
||||
};
|
||||
|
||||
//! the default constructor
|
||||
SURF_OCL();
|
||||
//! the full constructor taking all the necessary parameters
|
||||
explicit SURF_OCL(double _hessianThreshold, int _nOctaves=4,
|
||||
int _nOctaveLayers=2, bool _extended=false, float _keypointsRatio=0.01f, bool _upright = false);
|
||||
|
||||
//! returns the descriptor size in float's (64 or 128)
|
||||
int descriptorSize() const;
|
||||
|
||||
//! upload host keypoints to device memory
|
||||
void uploadKeypoints(const vector<cv::KeyPoint>& keypoints, oclMat& keypointsocl);
|
||||
//! download keypoints from device to host memory
|
||||
void downloadKeypoints(const oclMat& keypointsocl, vector<KeyPoint>& keypoints);
|
||||
|
||||
//! download descriptors from device to host memory
|
||||
void downloadDescriptors(const oclMat& descriptorsocl, vector<float>& descriptors);
|
||||
|
||||
//! finds the keypoints using fast hessian detector used in SURF
|
||||
//! supports CV_8UC1 images
|
||||
//! keypoints will have nFeature cols and 6 rows
|
||||
//! keypoints.ptr<float>(X_ROW)[i] will contain x coordinate of i'th feature
|
||||
//! keypoints.ptr<float>(Y_ROW)[i] will contain y coordinate of i'th feature
|
||||
//! keypoints.ptr<float>(LAPLACIAN_ROW)[i] will contain laplacian sign of i'th feature
|
||||
//! keypoints.ptr<float>(OCTAVE_ROW)[i] will contain octave of i'th feature
|
||||
//! keypoints.ptr<float>(SIZE_ROW)[i] will contain size of i'th feature
|
||||
//! keypoints.ptr<float>(ANGLE_ROW)[i] will contain orientation of i'th feature
|
||||
//! keypoints.ptr<float>(HESSIAN_ROW)[i] will contain response of i'th feature
|
||||
void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints);
|
||||
//! finds the keypoints and computes their descriptors.
|
||||
//! Optionally it can compute descriptors for the user-provided keypoints and recompute keypoints direction
|
||||
void operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,
|
||||
bool useProvidedKeypoints = false);
|
||||
|
||||
void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints);
|
||||
void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors,
|
||||
bool useProvidedKeypoints = false);
|
||||
|
||||
void operator()(const oclMat& img, const oclMat& mask, std::vector<KeyPoint>& keypoints, std::vector<float>& descriptors,
|
||||
bool useProvidedKeypoints = false);
|
||||
|
||||
void releaseMemory();
|
||||
|
||||
// SURF parameters
|
||||
float hessianThreshold;
|
||||
int nOctaves;
|
||||
int nOctaveLayers;
|
||||
bool extended;
|
||||
bool upright;
|
||||
|
||||
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
|
||||
float keypointsRatio;
|
||||
|
||||
oclMat sum, mask1, maskSum, intBuffer;
|
||||
|
||||
oclMat det, trace;
|
||||
|
||||
oclMat maxPosBuffer;
|
||||
|
||||
};
|
||||
}
|
||||
}
|
||||
#include "opencv2/ocl/matrix_operations.hpp"
|
||||
|
1787
modules/ocl/src/hog.cpp
Normal file
1787
modules/ocl/src/hog.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1259
modules/ocl/src/kernels/nonfree_surf.cl
Normal file
1259
modules/ocl/src/kernels/nonfree_surf.cl
Normal file
File diff suppressed because it is too large
Load Diff
450
modules/ocl/src/kernels/objdetect_hog.cl
Normal file
450
modules/ocl/src/kernels/objdetect_hog.cl
Normal file
@ -0,0 +1,450 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Wenju He, wenju@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#define CELL_WIDTH 8
|
||||
#define CELL_HEIGHT 8
|
||||
#define CELLS_PER_BLOCK_X 2
|
||||
#define CELLS_PER_BLOCK_Y 2
|
||||
#define NTHREADS 256
|
||||
#define CV_PI_F 3.1415926535897932384626433832795f
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Histogram computation
|
||||
|
||||
__kernel void compute_hists_kernel(const int width, const int cblock_stride_x, const int cblock_stride_y,
|
||||
const int cnbins, const int cblock_hist_size, const int img_block_width,
|
||||
const int grad_quadstep, const int qangle_step,
|
||||
__global const float* grad, __global const uchar* qangle,
|
||||
const float scale, __global float* block_hists, __local float* smem)
|
||||
{
|
||||
const int lidX = get_local_id(0);
|
||||
const int lidY = get_local_id(1);
|
||||
const int gidX = get_group_id(0);
|
||||
const int gidY = get_group_id(1);
|
||||
|
||||
const int cell_x = lidX / 16;
|
||||
const int cell_y = lidY;
|
||||
const int cell_thread_x = lidX & 0xF;
|
||||
|
||||
__local float* hists = smem;
|
||||
__local float* final_hist = smem + cnbins * 48;
|
||||
|
||||
const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x;
|
||||
const int offset_y = gidY * cblock_stride_y + (cell_y << 2);
|
||||
|
||||
__global const float* grad_ptr = grad + offset_y * grad_quadstep + (offset_x << 1);
|
||||
__global const uchar* qangle_ptr = qangle + offset_y * qangle_step + (offset_x << 1);
|
||||
|
||||
// 12 means that 12 pixels affect on block's cell (in one row)
|
||||
if (cell_thread_x < 12)
|
||||
{
|
||||
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + cell_thread_x;
|
||||
for (int bin_id = 0; bin_id < cnbins; ++bin_id)
|
||||
hist[bin_id * 48] = 0.f;
|
||||
|
||||
const int dist_x = -4 + cell_thread_x - 4 * cell_x;
|
||||
|
||||
const int dist_y_begin = -4 - 4 * lidY;
|
||||
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
|
||||
{
|
||||
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
|
||||
uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]);
|
||||
|
||||
grad_ptr += grad_quadstep;
|
||||
qangle_ptr += qangle_step;
|
||||
|
||||
int dist_center_y = dist_y - 4 * (1 - 2 * cell_y);
|
||||
int dist_center_x = dist_x - 4 * (1 - 2 * cell_x);
|
||||
|
||||
float gaussian = exp(-(dist_center_y * dist_center_y + dist_center_x * dist_center_x) * scale);
|
||||
float interp_weight = (8.f - fabs(dist_y + 0.5f)) * (8.f - fabs(dist_x + 0.5f)) / 64.f;
|
||||
|
||||
hist[bin.x * 48] += gaussian * interp_weight * vote.x;
|
||||
hist[bin.y * 48] += gaussian * interp_weight * vote.y;
|
||||
}
|
||||
|
||||
volatile __local float* hist_ = hist;
|
||||
for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48)
|
||||
{
|
||||
if (cell_thread_x < 6) hist_[0] += hist_[6];
|
||||
if (cell_thread_x < 3) hist_[0] += hist_[3];
|
||||
if (cell_thread_x == 0)
|
||||
final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2];
|
||||
}
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
__global float* block_hist = block_hists + (gidY * img_block_width + gidX) * cblock_hist_size;
|
||||
|
||||
int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 16 + cell_thread_x;
|
||||
if (tid < cblock_hist_size)
|
||||
block_hist[tid] = final_hist[tid];
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------
|
||||
// Normalization of histograms via L2Hys_norm
|
||||
//
|
||||
float reduce_smem(volatile __local float* smem, int size)
|
||||
{
|
||||
unsigned int tid = get_local_id(0);
|
||||
float sum = smem[tid];
|
||||
|
||||
if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; barrier(CLK_LOCAL_MEM_FENCE); }
|
||||
if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; barrier(CLK_LOCAL_MEM_FENCE); }
|
||||
if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; barrier(CLK_LOCAL_MEM_FENCE); }
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
if (size >= 64) smem[tid] = sum = sum + smem[tid + 32];
|
||||
if (size >= 32) smem[tid] = sum = sum + smem[tid + 16];
|
||||
if (size >= 16) smem[tid] = sum = sum + smem[tid + 8];
|
||||
if (size >= 8) smem[tid] = sum = sum + smem[tid + 4];
|
||||
if (size >= 4) smem[tid] = sum = sum + smem[tid + 2];
|
||||
if (size >= 2) smem[tid] = sum = sum + smem[tid + 1];
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
sum = smem[0];
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width,
|
||||
__global float* block_hists, const float threshold, __local float *squares)
|
||||
{
|
||||
const int tid = get_local_id(0);
|
||||
const int gidX = get_group_id(0);
|
||||
const int gidY = get_group_id(1);
|
||||
|
||||
__global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid;
|
||||
|
||||
float elem = 0.f;
|
||||
if (tid < block_hist_size)
|
||||
elem = hist[0];
|
||||
|
||||
squares[tid] = elem * elem;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
float sum = reduce_smem(squares, nthreads);
|
||||
|
||||
float scale = 1.0f / (sqrt(sum) + 0.1f * block_hist_size);
|
||||
elem = min(elem * scale, threshold);
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
squares[tid] = elem * elem;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
sum = reduce_smem(squares, nthreads);
|
||||
scale = 1.0f / (sqrt(sum) + 1e-3f);
|
||||
|
||||
if (tid < block_hist_size)
|
||||
hist[0] = elem * scale;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------
|
||||
// Linear SVM based classification
|
||||
//
|
||||
__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width,
|
||||
const int img_win_width, const int img_block_width,
|
||||
const int win_block_stride_x, const int win_block_stride_y,
|
||||
__global const float * block_hists, __global const float* coefs,
|
||||
float free_coef, float threshold, __global uchar* labels)
|
||||
{
|
||||
const int tid = get_local_id(0);
|
||||
const int gidX = get_group_id(0);
|
||||
const int gidY = get_group_id(1);
|
||||
|
||||
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
|
||||
|
||||
float product = 0.f;
|
||||
for (int i = tid; i < cdescr_size; i += NTHREADS)
|
||||
{
|
||||
int offset_y = i / cdescr_width;
|
||||
int offset_x = i - offset_y * cdescr_width;
|
||||
product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x];
|
||||
}
|
||||
|
||||
__local float products[NTHREADS];
|
||||
|
||||
products[tid] = product;
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128) products[tid] = product = product + products[tid + 128];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64) products[tid] = product = product + products[tid + 64];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
volatile __local float* smem = products;
|
||||
smem[tid] = product = product + smem[tid + 32];
|
||||
smem[tid] = product = product + smem[tid + 16];
|
||||
smem[tid] = product = product + smem[tid + 8];
|
||||
smem[tid] = product = product + smem[tid + 4];
|
||||
smem[tid] = product = product + smem[tid + 2];
|
||||
smem[tid] = product = product + smem[tid + 1];
|
||||
}
|
||||
|
||||
if (tid == 0)
|
||||
labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Extract descriptors
|
||||
|
||||
__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width,
|
||||
const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
|
||||
__global const float* block_hists, __global float* descriptors)
|
||||
{
|
||||
int tid = get_local_id(0);
|
||||
int gidX = get_group_id(0);
|
||||
int gidY = get_group_id(1);
|
||||
|
||||
// Get left top corner of the window in src
|
||||
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
|
||||
|
||||
// Get left top corner of the window in dst
|
||||
__global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
|
||||
|
||||
// Copy elements from src to dst
|
||||
for (int i = tid; i < cdescr_size; i += NTHREADS)
|
||||
{
|
||||
int offset_y = i / cdescr_width;
|
||||
int offset_x = i - offset_y * cdescr_width;
|
||||
descriptor[i] = hist[offset_y * img_block_width * cblock_hist_size + offset_x];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size,
|
||||
const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x,
|
||||
const int win_block_stride_y, __global const float* block_hists, __global float* descriptors)
|
||||
{
|
||||
int tid = get_local_id(0);
|
||||
int gidX = get_group_id(0);
|
||||
int gidY = get_group_id(1);
|
||||
|
||||
// Get left top corner of the window in src
|
||||
__global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size;
|
||||
|
||||
// Get left top corner of the window in dst
|
||||
__global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep;
|
||||
|
||||
// Copy elements from src to dst
|
||||
for (int i = tid; i < cdescr_size; i += NTHREADS)
|
||||
{
|
||||
int block_idx = i / cblock_hist_size;
|
||||
int idx_in_block = i - block_idx * cblock_hist_size;
|
||||
|
||||
int y = block_idx / cnblocks_win_x;
|
||||
int x = block_idx - y * cnblocks_win_x;
|
||||
|
||||
descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block];
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
// Gradients computation
|
||||
|
||||
__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
|
||||
const __global uchar4 * img, __global float * grad, __global uchar * qangle,
|
||||
const float angle_scale, const char correct_gamma, const int cnbins)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int tid = get_local_id(0);
|
||||
const int gSizeX = get_local_size(0);
|
||||
const int gidX = get_group_id(0);
|
||||
const int gidY = get_group_id(1);
|
||||
|
||||
__global const uchar4* row = img + gidY * img_step;
|
||||
|
||||
__local float sh_row[(NTHREADS + 2) * 3];
|
||||
|
||||
uchar4 val;
|
||||
if (x < width)
|
||||
val = row[x];
|
||||
else
|
||||
val = row[width - 2];
|
||||
|
||||
sh_row[tid + 1] = val.x;
|
||||
sh_row[tid + 1 + (NTHREADS + 2)] = val.y;
|
||||
sh_row[tid + 1 + 2 * (NTHREADS + 2)] = val.z;
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
val = row[max(x - 1, 1)];
|
||||
sh_row[0] = val.x;
|
||||
sh_row[(NTHREADS + 2)] = val.y;
|
||||
sh_row[2 * (NTHREADS + 2)] = val.z;
|
||||
}
|
||||
|
||||
if (tid == gSizeX - 1)
|
||||
{
|
||||
val = row[min(x + 1, width - 2)];
|
||||
sh_row[gSizeX + 1] = val.x;
|
||||
sh_row[gSizeX + 1 + (NTHREADS + 2)] = val.y;
|
||||
sh_row[gSizeX + 1 + 2 * (NTHREADS + 2)] = val.z;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (x < width)
|
||||
{
|
||||
float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]);
|
||||
float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]);
|
||||
|
||||
float3 dx;
|
||||
if (correct_gamma == 1)
|
||||
dx = sqrt(b) - sqrt(a);
|
||||
else
|
||||
dx = b - a;
|
||||
|
||||
float3 dy = (float3) 0.f;
|
||||
|
||||
if (gidY > 0 && gidY < height - 1)
|
||||
{
|
||||
a = convert_float3(img[(gidY - 1) * img_step + x].xyz);
|
||||
b = convert_float3(img[(gidY + 1) * img_step + x].xyz);
|
||||
|
||||
if (correct_gamma == 1)
|
||||
dy = sqrt(b) - sqrt(a);
|
||||
else
|
||||
dy = b - a;
|
||||
}
|
||||
|
||||
float best_dx = dx.x;
|
||||
float best_dy = dy.x;
|
||||
|
||||
float mag0 = dx.x * dx.x + dy.x * dy.x;
|
||||
float mag1 = dx.y * dx.y + dy.y * dy.y;
|
||||
if (mag0 < mag1)
|
||||
{
|
||||
best_dx = dx.y;
|
||||
best_dy = dy.y;
|
||||
mag0 = mag1;
|
||||
}
|
||||
|
||||
mag1 = dx.z * dx.z + dy.z * dy.z;
|
||||
if (mag0 < mag1)
|
||||
{
|
||||
best_dx = dx.z;
|
||||
best_dy = dy.z;
|
||||
mag0 = mag1;
|
||||
}
|
||||
|
||||
mag0 = sqrt(mag0);
|
||||
|
||||
float ang = (atan2(best_dy, best_dx) + CV_PI_F) * angle_scale - 0.5f;
|
||||
int hidx = (int)floor(ang);
|
||||
ang -= hidx;
|
||||
hidx = (hidx + cnbins) % cnbins;
|
||||
|
||||
qangle[(gidY * qangle_step + x) << 1] = hidx;
|
||||
qangle[((gidY * qangle_step + x) << 1) + 1] = (hidx + 1) % cnbins;
|
||||
grad[(gidY * grad_quadstep + x) << 1] = mag0 * (1.f - ang);
|
||||
grad[((gidY * grad_quadstep + x) << 1) + 1] = mag0 * ang;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step,
|
||||
__global const uchar * img, __global float * grad, __global uchar * qangle,
|
||||
const float angle_scale, const char correct_gamma, const int cnbins)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int tid = get_local_id(0);
|
||||
const int gSizeX = get_local_size(0);
|
||||
const int gidX = get_group_id(0);
|
||||
const int gidY = get_group_id(1);
|
||||
|
||||
__global const uchar* row = img + gidY * img_step;
|
||||
|
||||
__local float sh_row[NTHREADS + 2];
|
||||
|
||||
if (x < width)
|
||||
sh_row[tid + 1] = row[x];
|
||||
else
|
||||
sh_row[tid + 1] = row[width - 2];
|
||||
|
||||
if (tid == 0)
|
||||
sh_row[0] = row[max(x - 1, 1)];
|
||||
|
||||
if (tid == gSizeX - 1)
|
||||
sh_row[gSizeX + 1] = row[min(x + 1, width - 2)];
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if (x < width)
|
||||
{
|
||||
float dx;
|
||||
|
||||
if (correct_gamma == 1)
|
||||
dx = sqrt(sh_row[tid + 2]) - sqrt(sh_row[tid]);
|
||||
else
|
||||
dx = sh_row[tid + 2] - sh_row[tid];
|
||||
|
||||
float dy = 0.f;
|
||||
if (gidY > 0 && gidY < height - 1)
|
||||
{
|
||||
float a = (float) img[ (gidY + 1) * img_step + x ];
|
||||
float b = (float) img[ (gidY - 1) * img_step + x ];
|
||||
if (correct_gamma == 1)
|
||||
dy = sqrt(a) - sqrt(b);
|
||||
else
|
||||
dy = a - b;
|
||||
}
|
||||
float mag = sqrt(dx * dx + dy * dy);
|
||||
|
||||
float ang = (atan2(dy, dx) + CV_PI_F) * angle_scale - 0.5f;
|
||||
int hidx = (int)floor(ang);
|
||||
ang -= hidx;
|
||||
hidx = (hidx + cnbins) % cnbins;
|
||||
|
||||
qangle[ (gidY * qangle_step + x) << 1 ] = hidx;
|
||||
qangle[ ((gidY * qangle_step + x) << 1) + 1 ] = (hidx + 1) % cnbins;
|
||||
grad[ (gidY * grad_quadstep + x) << 1 ] = mag * (1.f - ang);
|
||||
grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang;
|
||||
}
|
||||
}
|
760
modules/ocl/src/surf.cpp
Normal file
760
modules/ocl/src/surf.cpp
Normal file
@ -0,0 +1,760 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Peng Xiao, pengxiao@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other oclMaterials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
#include <iomanip>
|
||||
#include "precomp.hpp"
|
||||
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace std;
|
||||
|
||||
#if !defined (HAVE_OPENCL)
|
||||
|
||||
cv::ocl::SURF_OCL::SURF_OCL() { throw_nogpu(); }
|
||||
cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) { throw_nogpu(); }
|
||||
int cv::ocl::SURF_OCL::descriptorSize() const { throw_nogpu(); return 0;}
|
||||
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>&, oclMat&) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat&, vector<float>&) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, oclMat&, oclMat&, bool) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, oclMat&, bool) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat&, const oclMat&, vector<KeyPoint>&, vector<float>&, bool) { throw_nogpu(); }
|
||||
void cv::ocl::SURF_OCL::releaseMemory() { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_OPENCL) */
|
||||
namespace cv { namespace ocl
|
||||
{
|
||||
///////////////////////////OpenCL kernel strings///////////////////////////
|
||||
extern const char * nonfree_surf;
|
||||
}}
|
||||
|
||||
namespace
|
||||
{
|
||||
static inline int divUp(int total, int grain)
|
||||
{
|
||||
return (total + grain - 1) / grain;
|
||||
}
|
||||
static inline int calcSize(int octave, int layer)
|
||||
{
|
||||
/* Wavelet size at first layer of first octave. */
|
||||
const int HAAR_SIZE0 = 9;
|
||||
|
||||
/* Wavelet size increment between layers. This should be an even number,
|
||||
such that the wavelet sizes in an octave are either all even or all odd.
|
||||
This ensures that when looking for the neighbours of a sample, the layers
|
||||
|
||||
above and below are aligned correctly. */
|
||||
const int HAAR_SIZE_INC = 6;
|
||||
|
||||
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
|
||||
}
|
||||
|
||||
class SURF_OCL_Invoker
|
||||
{
|
||||
public:
|
||||
// facilities
|
||||
void bindImgTex(const oclMat& img);
|
||||
void bindSumTex(const oclMat& sum);
|
||||
void bindMaskSumTex(const oclMat& maskSum);
|
||||
|
||||
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
|
||||
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
|
||||
|
||||
// kernel callers declearations
|
||||
void icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int layer_rows);
|
||||
|
||||
void icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
|
||||
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols);
|
||||
|
||||
void icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
|
||||
oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures);
|
||||
|
||||
void icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures);
|
||||
|
||||
void compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures);
|
||||
// end of kernel callers declearations
|
||||
|
||||
|
||||
SURF_OCL_Invoker(SURF_OCL& surf, const oclMat& img, const oclMat& mask) :
|
||||
surf_(surf),
|
||||
img_cols(img.cols), img_rows(img.rows),
|
||||
use_mask(!mask.empty())
|
||||
{
|
||||
CV_Assert(!img.empty() && img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
|
||||
CV_Assert(surf_.nOctaves > 0 && surf_.nOctaveLayers > 0);
|
||||
|
||||
const int min_size = calcSize(surf_.nOctaves - 1, 0);
|
||||
CV_Assert(img_rows - min_size >= 0);
|
||||
CV_Assert(img_cols - min_size >= 0);
|
||||
|
||||
const int layer_rows = img_rows >> (surf_.nOctaves - 1);
|
||||
const int layer_cols = img_cols >> (surf_.nOctaves - 1);
|
||||
const int min_margin = ((calcSize((surf_.nOctaves - 1), 2) >> 1) >> (surf_.nOctaves - 1)) + 1;
|
||||
CV_Assert(layer_rows - 2 * min_margin > 0);
|
||||
CV_Assert(layer_cols - 2 * min_margin > 0);
|
||||
|
||||
maxFeatures = std::min(static_cast<int>(img.size().area() * surf.keypointsRatio), 65535);
|
||||
maxCandidates = std::min(static_cast<int>(1.5 * maxFeatures), 65535);
|
||||
|
||||
CV_Assert(maxFeatures > 0);
|
||||
|
||||
counters.create(1, surf_.nOctaves + 1, CV_32SC1);
|
||||
counters.setTo(Scalar::all(0));
|
||||
|
||||
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
|
||||
|
||||
bindImgTex(img);
|
||||
oclMat integral_sqsum;
|
||||
integral(img, surf_.sum, integral_sqsum); // the two argumented integral version is incorrect
|
||||
|
||||
bindSumTex(surf_.sum);
|
||||
maskSumTex = 0;
|
||||
|
||||
if (use_mask)
|
||||
{
|
||||
throw std::exception();
|
||||
//!FIXME
|
||||
// temp fix for missing min overload
|
||||
oclMat temp(mask.size(), mask.type());
|
||||
temp.setTo(Scalar::all(1.0));
|
||||
//cv::ocl::min(mask, temp, surf_.mask1); ///////// disable this
|
||||
integral(surf_.mask1, surf_.maskSum);
|
||||
bindMaskSumTex(surf_.maskSum);
|
||||
}
|
||||
}
|
||||
|
||||
void detectKeypoints(oclMat& keypoints)
|
||||
{
|
||||
// create image pyramid buffers
|
||||
// different layers have same sized buffers, but they are sampled from gaussin kernel.
|
||||
surf_.det.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);
|
||||
surf_.trace.create(img_rows * (surf_.nOctaveLayers + 2), img_cols, CV_32FC1);
|
||||
|
||||
surf_.maxPosBuffer.create(1, maxCandidates, CV_32SC4);
|
||||
keypoints.create(SURF_OCL::ROWS_COUNT, maxFeatures, CV_32FC1);
|
||||
keypoints.setTo(Scalar::all(0));
|
||||
|
||||
for (int octave = 0; octave < surf_.nOctaves; ++octave)
|
||||
{
|
||||
const int layer_rows = img_rows >> octave;
|
||||
const int layer_cols = img_cols >> octave;
|
||||
|
||||
//loadOctaveConstants(octave, layer_rows, layer_cols);
|
||||
|
||||
icvCalcLayerDetAndTrace_gpu(surf_.det, surf_.trace, octave, surf_.nOctaveLayers, layer_rows);
|
||||
|
||||
icvFindMaximaInLayer_gpu(surf_.det, surf_.trace, surf_.maxPosBuffer, counters, 1 + octave,
|
||||
octave, use_mask, surf_.nOctaveLayers, layer_rows, layer_cols);
|
||||
|
||||
unsigned int maxCounter = Mat(counters).at<unsigned int>(1 + octave);
|
||||
maxCounter = std::min(maxCounter, static_cast<unsigned int>(maxCandidates));
|
||||
|
||||
if (maxCounter > 0)
|
||||
{
|
||||
icvInterpolateKeypoint_gpu(surf_.det, surf_.maxPosBuffer, maxCounter,
|
||||
keypoints, counters, octave, layer_rows, maxFeatures);
|
||||
}
|
||||
}
|
||||
unsigned int featureCounter = Mat(counters).at<unsigned int>(0);
|
||||
featureCounter = std::min(featureCounter, static_cast<unsigned int>(maxFeatures));
|
||||
|
||||
keypoints.cols = featureCounter;
|
||||
|
||||
if (surf_.upright)
|
||||
keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
|
||||
else
|
||||
findOrientation(keypoints);
|
||||
}
|
||||
|
||||
void findOrientation(oclMat& keypoints)
|
||||
{
|
||||
const int nFeatures = keypoints.cols;
|
||||
if (nFeatures > 0)
|
||||
{
|
||||
icvCalcOrientation_gpu(keypoints, nFeatures);
|
||||
}
|
||||
}
|
||||
|
||||
void computeDescriptors(const oclMat& keypoints, oclMat& descriptors, int descriptorSize)
|
||||
{
|
||||
const int nFeatures = keypoints.cols;
|
||||
if (nFeatures > 0)
|
||||
{
|
||||
descriptors.create(nFeatures, descriptorSize, CV_32F);
|
||||
compute_descriptors_gpu(descriptors, keypoints, nFeatures);
|
||||
}
|
||||
}
|
||||
|
||||
~SURF_OCL_Invoker()
|
||||
{
|
||||
if(imgTex)
|
||||
openCLFree(imgTex);
|
||||
if(sumTex)
|
||||
openCLFree(sumTex);
|
||||
if(maskSumTex)
|
||||
openCLFree(maskSumTex);
|
||||
additioalParamBuffer.release();
|
||||
}
|
||||
|
||||
private:
|
||||
SURF_OCL& surf_;
|
||||
|
||||
int img_cols, img_rows;
|
||||
|
||||
bool use_mask;
|
||||
|
||||
int maxCandidates;
|
||||
int maxFeatures;
|
||||
|
||||
oclMat counters;
|
||||
|
||||
// texture buffers
|
||||
cl_mem imgTex;
|
||||
cl_mem sumTex;
|
||||
cl_mem maskSumTex;
|
||||
|
||||
oclMat additioalParamBuffer;
|
||||
};
|
||||
}
|
||||
|
||||
cv::ocl::SURF_OCL::SURF_OCL()
|
||||
{
|
||||
hessianThreshold = 100.0f;
|
||||
extended = true;
|
||||
nOctaves = 4;
|
||||
nOctaveLayers = 2;
|
||||
keypointsRatio = 0.01f;
|
||||
upright = false;
|
||||
}
|
||||
|
||||
cv::ocl::SURF_OCL::SURF_OCL(double _threshold, int _nOctaves, int _nOctaveLayers, bool _extended, float _keypointsRatio, bool _upright)
|
||||
{
|
||||
hessianThreshold = _threshold;
|
||||
extended = _extended;
|
||||
nOctaves = _nOctaves;
|
||||
nOctaveLayers = _nOctaveLayers;
|
||||
keypointsRatio = _keypointsRatio;
|
||||
upright = _upright;
|
||||
}
|
||||
|
||||
int cv::ocl::SURF_OCL::descriptorSize() const
|
||||
{
|
||||
return extended ? 128 : 64;
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::uploadKeypoints(const vector<KeyPoint>& keypoints, oclMat& keypointsGPU)
|
||||
{
|
||||
if (keypoints.empty())
|
||||
keypointsGPU.release();
|
||||
else
|
||||
{
|
||||
Mat keypointsCPU(SURF_OCL::ROWS_COUNT, static_cast<int>(keypoints.size()), CV_32FC1);
|
||||
|
||||
float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
|
||||
float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
|
||||
int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
|
||||
int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
|
||||
float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
|
||||
float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
|
||||
float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
|
||||
|
||||
for (size_t i = 0, size = keypoints.size(); i < size; ++i)
|
||||
{
|
||||
const KeyPoint& kp = keypoints[i];
|
||||
kp_x[i] = kp.pt.x;
|
||||
kp_y[i] = kp.pt.y;
|
||||
kp_octave[i] = kp.octave;
|
||||
kp_size[i] = kp.size;
|
||||
kp_dir[i] = kp.angle;
|
||||
kp_hessian[i] = kp.response;
|
||||
kp_laplacian[i] = 1;
|
||||
}
|
||||
|
||||
keypointsGPU.upload(keypointsCPU);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat& keypointsGPU, vector<KeyPoint>& keypoints)
|
||||
{
|
||||
const int nFeatures = keypointsGPU.cols;
|
||||
|
||||
if (nFeatures == 0)
|
||||
keypoints.clear();
|
||||
else
|
||||
{
|
||||
CV_Assert(keypointsGPU.type() == CV_32FC1 && keypointsGPU.rows == ROWS_COUNT);
|
||||
|
||||
Mat keypointsCPU(keypointsGPU);
|
||||
|
||||
keypoints.resize(nFeatures);
|
||||
|
||||
float* kp_x = keypointsCPU.ptr<float>(SURF_OCL::X_ROW);
|
||||
float* kp_y = keypointsCPU.ptr<float>(SURF_OCL::Y_ROW);
|
||||
int* kp_laplacian = keypointsCPU.ptr<int>(SURF_OCL::LAPLACIAN_ROW);
|
||||
int* kp_octave = keypointsCPU.ptr<int>(SURF_OCL::OCTAVE_ROW);
|
||||
float* kp_size = keypointsCPU.ptr<float>(SURF_OCL::SIZE_ROW);
|
||||
float* kp_dir = keypointsCPU.ptr<float>(SURF_OCL::ANGLE_ROW);
|
||||
float* kp_hessian = keypointsCPU.ptr<float>(SURF_OCL::HESSIAN_ROW);
|
||||
|
||||
for (int i = 0; i < nFeatures; ++i)
|
||||
{
|
||||
KeyPoint& kp = keypoints[i];
|
||||
kp.pt.x = kp_x[i];
|
||||
kp.pt.y = kp_y[i];
|
||||
kp.class_id = kp_laplacian[i];
|
||||
kp.octave = kp_octave[i];
|
||||
kp.size = kp_size[i];
|
||||
kp.angle = kp_dir[i];
|
||||
kp.response = kp_hessian[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat& descriptorsGPU, vector<float>& descriptors)
|
||||
{
|
||||
if (descriptorsGPU.empty())
|
||||
descriptors.clear();
|
||||
else
|
||||
{
|
||||
CV_Assert(descriptorsGPU.type() == CV_32F);
|
||||
|
||||
descriptors.resize(descriptorsGPU.rows * descriptorsGPU.cols);
|
||||
Mat descriptorsCPU(descriptorsGPU.size(), CV_32F, &descriptors[0]);
|
||||
descriptorsGPU.download(descriptorsCPU);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints)
|
||||
{
|
||||
if (!img.empty())
|
||||
{
|
||||
SURF_OCL_Invoker surf(*this, img, mask);
|
||||
|
||||
surf.detectKeypoints(keypoints);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, oclMat& keypoints, oclMat& descriptors,
|
||||
bool useProvidedKeypoints)
|
||||
{
|
||||
if (!img.empty())
|
||||
{
|
||||
SURF_OCL_Invoker surf(*this, img, mask);
|
||||
|
||||
if (!useProvidedKeypoints)
|
||||
surf.detectKeypoints(keypoints);
|
||||
else if (!upright)
|
||||
{
|
||||
surf.findOrientation(keypoints);
|
||||
}
|
||||
|
||||
surf.computeDescriptors(keypoints, descriptors, descriptorSize());
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints)
|
||||
{
|
||||
oclMat keypointsGPU;
|
||||
|
||||
(*this)(img, mask, keypointsGPU);
|
||||
|
||||
downloadKeypoints(keypointsGPU, keypoints);
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
|
||||
oclMat& descriptors, bool useProvidedKeypoints)
|
||||
{
|
||||
oclMat keypointsGPU;
|
||||
|
||||
if (useProvidedKeypoints)
|
||||
uploadKeypoints(keypoints, keypointsGPU);
|
||||
|
||||
(*this)(img, mask, keypointsGPU, descriptors, useProvidedKeypoints);
|
||||
|
||||
downloadKeypoints(keypointsGPU, keypoints);
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::operator()(const oclMat& img, const oclMat& mask, vector<KeyPoint>& keypoints,
|
||||
vector<float>& descriptors, bool useProvidedKeypoints)
|
||||
{
|
||||
oclMat descriptorsGPU;
|
||||
|
||||
(*this)(img, mask, keypoints, descriptorsGPU, useProvidedKeypoints);
|
||||
|
||||
downloadDescriptors(descriptorsGPU, descriptors);
|
||||
}
|
||||
|
||||
void cv::ocl::SURF_OCL::releaseMemory()
|
||||
{
|
||||
sum.release();
|
||||
mask1.release();
|
||||
maskSum.release();
|
||||
intBuffer.release();
|
||||
det.release();
|
||||
trace.release();
|
||||
maxPosBuffer.release();
|
||||
}
|
||||
|
||||
// Facilities
|
||||
|
||||
//// load SURF constants into device memory
|
||||
//void SURF_OCL_Invoker::loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold)
|
||||
//{
|
||||
// Mat tmp(1, 9, CV_32FC1);
|
||||
// float * tmp_data = tmp.ptr<float>();
|
||||
// *tmp_data = maxCandidates;
|
||||
// *(++tmp_data) = maxFeatures;
|
||||
// *(++tmp_data) = img_rows;
|
||||
// *(++tmp_data) = img_cols;
|
||||
// *(++tmp_data) = nOctaveLayers;
|
||||
// *(++tmp_data) = hessianThreshold;
|
||||
// additioalParamBuffer = tmp;
|
||||
//}
|
||||
//void SURF_OCL_Invoker::loadOctaveConstants(int octave, int layer_rows, int layer_cols)
|
||||
//{
|
||||
// Mat tmp = additioalParamBuffer;
|
||||
// float * tmp_data = tmp.ptr<float>();
|
||||
// tmp_data += 6;
|
||||
// *tmp_data = octave;
|
||||
// *(++tmp_data) = layer_rows;
|
||||
// *(++tmp_data) = layer_cols;
|
||||
// additioalParamBuffer = tmp;
|
||||
//}
|
||||
|
||||
// create and bind source buffer to image oject.
|
||||
void SURF_OCL_Invoker::bindImgTex(const oclMat& img)
|
||||
{
|
||||
Mat cpu_img(img); // time consuming
|
||||
cl_image_format format;
|
||||
int err;
|
||||
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT8;
|
||||
format.image_channel_order = CL_R;
|
||||
|
||||
#if CL_VERSION_1_2
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = cpu_img.cols;
|
||||
desc.image_height = cpu_img.rows;
|
||||
desc.image_depth = NULL;
|
||||
desc.image_array_size = 1;
|
||||
desc.image_row_pitch = cpu_img.step;
|
||||
desc.image_slice_pitch= 0;
|
||||
desc.buffer = NULL;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
imgTex = clCreateImage(img.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
|
||||
#else
|
||||
imgTex = clCreateImage2D(
|
||||
img.clCxt->impl->clContext,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
&format,
|
||||
cpu_img.cols,
|
||||
cpu_img.rows,
|
||||
cpu_img.step,
|
||||
cpu_img.data,
|
||||
&err);
|
||||
#endif
|
||||
openCLSafeCall(err);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::bindSumTex(const oclMat& sum)
|
||||
{
|
||||
Mat cpu_img(sum); // time consuming
|
||||
cl_image_format format;
|
||||
int err;
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
format.image_channel_order = CL_R;
|
||||
#if CL_VERSION_1_2
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = cpu_img.cols;
|
||||
desc.image_height = cpu_img.rows;
|
||||
desc.image_depth = NULL;
|
||||
desc.image_array_size = 1;
|
||||
desc.image_row_pitch = cpu_img.step;
|
||||
desc.image_slice_pitch= 0;
|
||||
desc.buffer = NULL;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
sumTex = clCreateImage(sum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
|
||||
#else
|
||||
sumTex = clCreateImage2D(
|
||||
sum.clCxt->impl->clContext,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
&format,
|
||||
cpu_img.cols,
|
||||
cpu_img.rows,
|
||||
cpu_img.step,
|
||||
cpu_img.data,
|
||||
&err);
|
||||
#endif
|
||||
openCLSafeCall(err);
|
||||
}
|
||||
void SURF_OCL_Invoker::bindMaskSumTex(const oclMat& maskSum)
|
||||
{
|
||||
Mat cpu_img(maskSum); // time consuming
|
||||
cl_image_format format;
|
||||
int err;
|
||||
format.image_channel_data_type = CL_UNSIGNED_INT32;
|
||||
format.image_channel_order = CL_R;
|
||||
#if CL_VERSION_1_2
|
||||
cl_image_desc desc;
|
||||
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
|
||||
desc.image_width = cpu_img.cols;
|
||||
desc.image_height = cpu_img.rows;
|
||||
desc.image_depth = NULL;
|
||||
desc.image_array_size = 1;
|
||||
desc.image_row_pitch = cpu_img.step;
|
||||
desc.image_slice_pitch= 0;
|
||||
desc.buffer = NULL;
|
||||
desc.num_mip_levels = 0;
|
||||
desc.num_samples = 0;
|
||||
maskSumTex = clCreateImage(maskSum.clCxt->impl->clContext, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &format, &desc, cpu_img.data, &err);
|
||||
#else
|
||||
maskSumTex = clCreateImage2D(
|
||||
maskSum.clCxt->impl->clContext,
|
||||
CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
|
||||
&format,
|
||||
cpu_img.cols,
|
||||
cpu_img.rows,
|
||||
cpu_img.step,
|
||||
cpu_img.data,
|
||||
&err);
|
||||
#endif
|
||||
openCLSafeCall(err);
|
||||
}
|
||||
|
||||
////////////////////////////
|
||||
// kernel caller definitions
|
||||
void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat& det, oclMat& trace, int octave, int nOctaveLayers, int c_layer_rows)
|
||||
{
|
||||
const int min_size = calcSize(octave, 0);
|
||||
const int max_samples_i = 1 + ((img_rows - min_size) >> octave);
|
||||
const int max_samples_j = 1 + ((img_cols - min_size) >> octave);
|
||||
|
||||
Context *clCxt = det.clCxt;
|
||||
string kernelName = "icvCalcLayerDetAndTrace";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&nOctaveLayers));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&c_layer_rows));
|
||||
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
size_t globalThreads[3] = {
|
||||
divUp(max_samples_j, localThreads[0]) * localThreads[0],
|
||||
divUp(max_samples_i, localThreads[1]) * localThreads[1] * (nOctaveLayers + 2),
|
||||
1};
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat& det, const oclMat& trace, oclMat& maxPosBuffer, oclMat& maxCounter, int counterOffset,
|
||||
int octave, bool use_mask, int nLayers, int layer_rows, int layer_cols)
|
||||
{
|
||||
const int min_margin = ((calcSize(octave, 2) >> 1) >> octave) + 1;
|
||||
|
||||
Context *clCxt = det.clCxt;
|
||||
string kernelName = use_mask ? "icvFindMaximaInLayer_withmask" : "icvFindMaximaInLayer";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&trace.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&maxCounter.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&counterOffset));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&trace.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&nLayers));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&layer_cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&maxCandidates));
|
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&surf_.hessianThreshold));
|
||||
|
||||
if(use_mask)
|
||||
{
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&maskSumTex));
|
||||
}
|
||||
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
size_t globalThreads[3] = {divUp(layer_cols - 2 * min_margin, localThreads[0] - 2) * localThreads[0],
|
||||
divUp(layer_rows - 2 * min_margin, localThreads[1] - 2) * nLayers * localThreads[1],
|
||||
1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat& det, const oclMat& maxPosBuffer, unsigned int maxCounter,
|
||||
oclMat& keypoints, oclMat& counters, int octave, int layer_rows, int maxFeatures)
|
||||
{
|
||||
Context *clCxt = det.clCxt;
|
||||
string kernelName = "icvInterpolateKeypoint";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&det.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&maxPosBuffer.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&counters.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&det.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&octave));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&layer_rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&maxFeatures));
|
||||
|
||||
size_t localThreads[3] = {3, 3, 3};
|
||||
size_t globalThreads[3] = {maxCounter * localThreads[0], 1, 1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat& keypoints, int nFeatures)
|
||||
{
|
||||
Context * clCxt = counters.clCxt;
|
||||
string kernelName = "icvCalcOrientation";
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&sumTex));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_rows));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&img_cols));
|
||||
|
||||
size_t localThreads[3] = {32, 4, 1};
|
||||
size_t globalThreads[3] = {nFeatures * localThreads[0], localThreads[1], 1};
|
||||
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
||||
void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat& descriptors, const oclMat& keypoints, int nFeatures)
|
||||
{
|
||||
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
|
||||
Context *clCxt = descriptors.clCxt;
|
||||
string kernelName = "";
|
||||
vector< pair<size_t, const void *> > args;
|
||||
size_t localThreads[3] = {1, 1, 1};
|
||||
size_t globalThreads[3] = {1, 1, 1};
|
||||
|
||||
if(descriptors.cols == 64)
|
||||
{
|
||||
kernelName = "compute_descriptors64";
|
||||
|
||||
localThreads[0] = 6;
|
||||
localThreads[1] = 6;
|
||||
|
||||
globalThreads[0] = nFeatures * localThreads[0];
|
||||
globalThreads[1] = 16 * localThreads[1];
|
||||
|
||||
args.clear();
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
kernelName = "normalize_descriptors64";
|
||||
|
||||
localThreads[0] = 64;
|
||||
localThreads[1] = 1;
|
||||
|
||||
globalThreads[0] = nFeatures * localThreads[0];
|
||||
globalThreads[1] = localThreads[1];
|
||||
|
||||
args.clear();
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
else
|
||||
{
|
||||
kernelName = "compute_descriptors128";
|
||||
|
||||
localThreads[0] = 6;
|
||||
localThreads[1] = 6;
|
||||
|
||||
globalThreads[0] = nFeatures * localThreads[0];
|
||||
globalThreads[1] = 16 * localThreads[1];
|
||||
|
||||
args.clear();
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&imgTex));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&keypoints.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&keypoints.step));
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
kernelName = "normalize_descriptors128";
|
||||
|
||||
localThreads[0] = 128;
|
||||
localThreads[1] = 1;
|
||||
|
||||
globalThreads[0] = nFeatures * localThreads[0];
|
||||
globalThreads[1] = localThreads[1];
|
||||
|
||||
args.clear();
|
||||
args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data));
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&descriptors.step));
|
||||
openCLExecuteKernel(clCxt, &nonfree_surf, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
}
|
||||
|
||||
#endif // /* !defined (HAVE_OPENCL) */
|
||||
|
192
modules/ocl/test/test_hog.cpp
Normal file
192
modules/ocl/test/test_hog.cpp
Normal file
@ -0,0 +1,192 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// Intel License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Wenju He, wenju@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of Intel Corporation may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/core.hpp"
|
||||
using namespace std;
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
|
||||
PARAM_TEST_CASE(HOG,cv::Size,int)
|
||||
{
|
||||
cv::Size winSize;
|
||||
int type;
|
||||
vector<cv::ocl::Info> info;
|
||||
virtual void SetUp()
|
||||
{
|
||||
winSize = GET_PARAM(0);
|
||||
type = GET_PARAM(1);
|
||||
cv::ocl::getDevice(info);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_P(HOG, GetDescriptors)
|
||||
{
|
||||
// Load image
|
||||
cv::Mat img_rgb = readImage("../../../samples/gpu/road.png");
|
||||
ASSERT_FALSE(img_rgb.empty());
|
||||
|
||||
// Convert image
|
||||
cv::Mat img;
|
||||
switch (type)
|
||||
{
|
||||
case CV_8UC1:
|
||||
cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
|
||||
break;
|
||||
case CV_8UC4:
|
||||
default:
|
||||
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||
break;
|
||||
}
|
||||
cv::ocl::oclMat d_img(img);
|
||||
|
||||
// HOGs
|
||||
cv::ocl::HOGDescriptor ocl_hog;
|
||||
ocl_hog.gamma_correction = true;
|
||||
cv::HOGDescriptor hog;
|
||||
hog.gammaCorrection = true;
|
||||
|
||||
// Compute descriptor
|
||||
cv::ocl::oclMat d_descriptors;
|
||||
ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL);
|
||||
cv::Mat down_descriptors;
|
||||
d_descriptors.download(down_descriptors);
|
||||
down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows);
|
||||
|
||||
hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||
std::vector<float> descriptors;
|
||||
switch (type)
|
||||
{
|
||||
case CV_8UC1:
|
||||
hog.compute(img, descriptors, ocl_hog.win_size);
|
||||
break;
|
||||
case CV_8UC4:
|
||||
default:
|
||||
hog.compute(img_rgb, descriptors, ocl_hog.win_size);
|
||||
break;
|
||||
}
|
||||
cv::Mat cpu_descriptors(descriptors);
|
||||
|
||||
EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2);
|
||||
}
|
||||
|
||||
|
||||
TEST_P(HOG, Detect)
|
||||
{
|
||||
// Load image
|
||||
cv::Mat img_rgb = readImage("../../../samples/gpu/road.png");
|
||||
ASSERT_FALSE(img_rgb.empty());
|
||||
|
||||
// Convert image
|
||||
cv::Mat img;
|
||||
switch (type)
|
||||
{
|
||||
case CV_8UC1:
|
||||
cv::cvtColor(img_rgb, img, CV_BGR2GRAY);
|
||||
break;
|
||||
case CV_8UC4:
|
||||
default:
|
||||
cv::cvtColor(img_rgb, img, CV_BGR2BGRA);
|
||||
break;
|
||||
}
|
||||
cv::ocl::oclMat d_img(img);
|
||||
|
||||
// HOGs
|
||||
if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128)))
|
||||
winSize = cv::Size(64, 128);
|
||||
cv::ocl::HOGDescriptor ocl_hog(winSize);
|
||||
ocl_hog.gamma_correction = true;
|
||||
|
||||
cv::HOGDescriptor hog;
|
||||
hog.winSize = winSize;
|
||||
hog.gammaCorrection = true;
|
||||
|
||||
if (winSize.width == 48 && winSize.height == 96)
|
||||
{
|
||||
// daimler's base
|
||||
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector48x96());
|
||||
hog.setSVMDetector(hog.getDaimlerPeopleDetector());
|
||||
}
|
||||
else if (winSize.width == 64 && winSize.height == 128)
|
||||
{
|
||||
ocl_hog.setSVMDetector(ocl_hog.getPeopleDetector64x128());
|
||||
hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||
}
|
||||
else
|
||||
{
|
||||
ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector());
|
||||
hog.setSVMDetector(hog.getDefaultPeopleDetector());
|
||||
}
|
||||
|
||||
// OpenCL detection
|
||||
std::vector<cv::Point> d_v_locations;
|
||||
ocl_hog.detect(d_img, d_v_locations, 0);
|
||||
cv::Mat d_locations(d_v_locations);
|
||||
|
||||
// CPU detection
|
||||
std::vector<cv::Point> v_locations;
|
||||
switch (type)
|
||||
{
|
||||
case CV_8UC1:
|
||||
hog.detect(img, v_locations, 0);
|
||||
break;
|
||||
case CV_8UC4:
|
||||
default:
|
||||
hog.detect(img_rgb, v_locations, 0);
|
||||
break;
|
||||
}
|
||||
cv::Mat locations(v_locations);
|
||||
|
||||
char s[100]={0};
|
||||
EXPECT_MAT_NEAR(d_locations, locations, 0, s);
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine(
|
||||
testing::Values(cv::Size(64, 128), cv::Size(48, 96)),
|
||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4))));
|
||||
|
||||
|
||||
#endif //HAVE_OPENCL
|
Loading…
x
Reference in New Issue
Block a user