some changes

This commit is contained in:
Konstantin Matskevich 2014-01-31 08:46:27 +04:00
parent 3ea32b87bb
commit 5c38519c65
4 changed files with 69 additions and 54 deletions

View File

@ -327,7 +327,7 @@ public:
Size padding = Size(),
const std::vector<Point>& searchLocations=std::vector<Point>()) const;
//ocl
virtual bool ocl_detect(const UMat& img, std::vector<Point> &hits,
virtual bool ocl_detect(InputArray img, std::vector<Point> &hits,
double hitThreshold = 0, Size winStride = Size()) const;
//with result weights output
CV_WRAP virtual void detectMultiScale(InputArray img, CV_OUT std::vector<Rect>& foundLocations,
@ -360,7 +360,7 @@ public:
CV_PROP double L2HysThreshold;
CV_PROP bool gammaCorrection;
CV_PROP std::vector<float> svmDetector;
CV_PROP std::vector<float> oclSvmDetector;
UMat oclSvmDetector;
CV_PROP int nlevels;

View File

@ -85,7 +85,7 @@ OCL_PERF_TEST(HOGFixture, HOG)
OCL_TEST_CYCLE() hog.detectMultiScale(src, found_locations);
std::sort(found_locations.begin(), found_locations.end(), RectLess());
// SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
SANITY_CHECK(found_locations, 1 + DBL_EPSILON);
}
}

View File

@ -113,10 +113,7 @@ void HOGDescriptor::setSVMDetector(InputArray _svmDetector)
{
_svmDetector.getMat().convertTo(svmDetector, CV_32F);
std::vector<float> detector;
_svmDetector.getMat().copyTo(detector);
std::vector<float> detector_reordered(detector.size());
Mat detector_reordered(1, (int)svmDetector.size(), CV_32FC1);
size_t block_hist_size = getBlockHistogramSize(blockSize, cellSize, nbins);
cv::Size blocks_per_img = numPartsWithin(winSize, blockSize, blockStride);
@ -124,12 +121,12 @@ void HOGDescriptor::setSVMDetector(InputArray _svmDetector)
for (int i = 0; i < blocks_per_img.height; ++i)
for (int j = 0; j < blocks_per_img.width; ++j)
{
const float *src = &detector[0] + (j * blocks_per_img.height + i) * block_hist_size;
float *dst = &detector_reordered[0] + (i * blocks_per_img.width + j) * block_hist_size;
const float *src = &svmDetector[0] + (j * blocks_per_img.height + i) * block_hist_size;
float *dst = (float*)detector_reordered.data + (i * blocks_per_img.width + j) * block_hist_size;
for (size_t k = 0; k < block_hist_size; ++k)
dst[k] = src[k];
}
Mat(detector_reordered).convertTo(oclSvmDetector, CV_32F);
detector_reordered.copyTo(oclSvmDetector);
CV_Assert(checkDetectorSize());
}
@ -1119,14 +1116,16 @@ static bool ocl_computeGradient(InputArray img, UMat grad, UMat qangle, int nbin
static bool ocl_compute_hists(int nbins, int block_stride_x, int block_stride_y, int height, int width,
UMat grad, UMat qangle, UMat gauss_w_lut, UMat block_hists, size_t block_hist_size)
{
ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc);
if(k.empty())
return false;
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", 32);
ocl::Kernel k("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("compute_hists_lut_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
@ -1177,13 +1176,6 @@ static int power_2up(unsigned int n)
static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int height, int width, UMat block_hists, float threshold)
{
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", 32);
int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x)
/ block_stride_x;
@ -1194,12 +1186,22 @@ static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_
size_t localThreads[3] = { 1, 1, 1 };
int idx = 0;
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
ocl::Kernel k;
if ( nbins == 9 )
{
k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
if(k.empty())
return false;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("normalize_hists_36_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
int blocks_in_group = NTHREADS / block_hist_size;
nthreads = blocks_in_group * block_hist_size;
int num_groups = (img_block_width * img_block_height + blocks_in_group - 1)/blocks_in_group;
@ -1208,9 +1210,17 @@ static bool ocl_normalize_hists(int nbins, int block_stride_x, int block_stride_
}
else
{
k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
if(k.empty())
return false;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("normalize_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
nthreads = power_2up(block_hist_size);
globalThreads[0] = img_block_width * nthreads;
globalThreads[1] = img_block_height;
@ -1377,9 +1387,8 @@ void HOGDescriptor::compute(InputArray _img, std::vector<float>& descriptors,
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(imgSize.width + padding.width*2, imgSize.height + padding.height*2);
if(ocl::useOpenCL() && _img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat() &&
CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && _img.isUMat(),
ocl_compute(_img, winStride, descriptors, DESCR_FORMAT_COL_BY_COL))
return;
Mat img = _img.getMat();
HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
@ -1605,16 +1614,12 @@ private:
static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width,
const UMat& block_hists, const std::vector<float>& _detector,
const UMat& block_hists, UMat detector,
float free_coef, float threshold, UMat& labels, Size descr_size, int block_hist_size)
{
int nthreads;
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", 32);
ocl::Kernel k;
int idx = 0;
@ -1622,22 +1627,45 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y
{
case 180:
nthreads = 180;
k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
if(k.empty())
return false;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("classify_hists_180_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
idx = k.set(idx, descr_size.width);
idx = k.set(idx, descr_size.height);
break;
case 252:
nthreads = 256;
k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
if(k.empty())
return false;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("classify_hists_252_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
idx = k.set(idx, descr_size.width);
idx = k.set(idx, descr_size.height);
break;
default:
nthreads = 256;
k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, "");
if(k.empty())
return false;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", k.preferedWorkGroupSizeMultiple());
k.create("classify_hists_kernel", ocl::objdetect::objdetect_hog_oclsrc, opts);
if(k.empty())
return false;
@ -1655,8 +1683,6 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y
size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 };
size_t localThreads[3] = { nthreads, 1, 1 };
UMat detector(_detector, true);
idx = k.set(idx, block_hist_size);
idx = k.set(idx, img_win_width);
idx = k.set(idx, img_block_width);
@ -1671,7 +1697,7 @@ static bool ocl_classify_hists(int win_height, int win_width, int block_stride_y
return k.run(2, globalThreads, localThreads, false);
}
bool HOGDescriptor::ocl_detect(const UMat& img, std::vector<Point> &hits,
bool HOGDescriptor::ocl_detect(InputArray img, std::vector<Point> &hits,
double hit_threshold, Size win_stride) const
{
hits.clear();
@ -1743,20 +1769,21 @@ bool HOGDescriptor::ocl_detectMultiScale(InputArray _img, std::vector<Rect> &fou
{
std::vector<Rect> all_candidates;
std::vector<Point> locations;
UMat img = _img.getUMat(), image_scale;
image_scale.create(img.size(), img.type());
UMat image_scale;
Size imgSize = _img.size();
image_scale.create(imgSize, _img.type());
for (size_t i = 0; i<level_scale.size() ; i++)
{
double scale = level_scale[i];
Size effect_size = Size(cvRound(img.cols / scale), cvRound(img.rows / scale));
if (effect_size == img.size())
Size effect_size = Size(cvRound(imgSize.width / scale), cvRound(imgSize.height / scale));
if (effect_size == imgSize)
{
if(!ocl_detect(img, locations, hit_threshold, win_stride)) return false;
if(!ocl_detect(_img, locations, hit_threshold, win_stride)) return false;
}
else
{
resize(img, image_scale, effect_size);
resize(_img, image_scale, effect_size);
if(!ocl_detect(image_scale, locations, hit_threshold, win_stride)) return false;
}
Size scaled_win_size(cvRound(winSize.width * scale),
@ -1791,10 +1818,12 @@ void HOGDescriptor::detectMultiScale(
levels = std::max(levels, 1);
levelScale.resize(levels);
if(ocl::useOpenCL() && _img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 &&
winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat() &&
ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold))
return;
if(winStride == Size())
winStride = blockStride;
CV_OCL_RUN(_img.dims() <= 2 && _img.type() == CV_8UC1 && scale0 > 1 && winStride.width % blockStride.width == 0 &&
winStride.height % blockStride.height == 0 && padding == Size(0,0) && _img.isUMat(),
ocl_detectMultiScale(_img, foundLocations, levelScale, hitThreshold, winStride, finalThreshold));
std::vector<Rect> allCandidates;
std::vector<double> tempScales;

View File

@ -110,20 +110,6 @@ OCL_TEST_P(HOG, Detect)
OCL_OFF(hog.detectMultiScale(img, cpu_found, 0, Size(8, 8), Size(0, 0), 1.05, 6));
OCL_ON(hog.detectMultiScale(uimg, gpu_found, 0, Size(8, 8), Size(0, 0), 1.05, 6));
for (size_t i = 0; i < cpu_found.size(); i++)
{
Rect r = cpu_found[i];
rectangle(img, r.tl(), r.br(), Scalar(0, 255, 0), 3);
}
imshow("cpu", img);
Mat imgs(img);
for (size_t i = 0; i < gpu_found.size(); i++)
{
Rect r = cpu_found[i];
rectangle(imgs, r.tl(), r.br(), Scalar(0, 255, 0), 3);
}
imshow("gpu", imgs);
waitKey(0);
EXPECT_LT(checkRectSimilarity(img.size(), cpu_found, gpu_found), 1.0);
}