diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 2af921f1f..5ff90c9b2 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1007,11 +1007,15 @@ namespace cv GpuMat table_space; }; + + //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// + struct CV_EXPORTS HOGDescriptor { public: enum { DEFAULT_WIN_SIGMA = -1 }; enum { DEFAULT_NLEVELS = 64 }; + enum { DESCR_FORMAT_ROW_BY_ROW, DESCR_FORMAT_COL_BY_COL }; HOGDescriptor(Size win_size=Size(64, 128), Size block_size=Size(16, 16), Size block_stride=Size(8, 8), Size cell_size=Size(8, 8), @@ -1029,13 +1033,14 @@ namespace cv void setSVMDetector(const vector& detector); bool checkDetectorSize() const; - void computeBlockHistograms(const GpuMat& img); void detect(const GpuMat& img, vector& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size()); void detectMultiScale(const GpuMat& img, vector& found_locations, double hit_threshold=0, Size win_stride=Size(), Size padding=Size(), double scale0=1.05, int group_threshold=2); - void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors); + + void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, + int descr_format=DESCR_FORMAT_COL_BY_COL); Size win_size; Size block_size; @@ -1044,9 +1049,17 @@ namespace cv int nbins; double win_sigma; double threshold_L2hys; - bool gamma_correction; int nlevels; + protected: + void computeBlockHistograms(const GpuMat& img); + void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle); + + static int numPartsWithin(int size, int part_size, int stride); + static Size numPartsWithin(Size size, Size part_size, Size stride); + + bool gamma_correction; + // Coefficients of the separating plane float free_coef; GpuMat detector; @@ -1058,13 +1071,8 @@ namespace cv // Results of the last histogram evaluation step GpuMat block_hists; - private: - static int numPartsWithin(int size, int part_size, int stride); - static Size numPartsWithin(Size size, Size part_size, Size stride); - - void computeGradient(const GpuMat& img, GpuMat& grad, GpuMat& qangle); - - GpuMat grad, qangle; + // Gradients conputation results + GpuMat grad, qangle; }; } diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index 3dc7147b6..b1fee4c3c 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -428,9 +428,9 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block template -__global__ void extract_descriptors_kernel(const int img_win_width, const int img_block_width, - const int win_block_stride_x, const int win_block_stride_y, - const float* block_hists, PtrElemStepf descriptors) +__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, + const int win_block_stride_y, const float* block_hists, + PtrElemStepf descriptors) { // Get left top corner of the window in src const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + @@ -449,9 +449,9 @@ __global__ void extract_descriptors_kernel(const int img_win_width, const int im } -void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, float* block_hists, - DevMem2Df descriptors) +void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + DevMem2Df descriptors) { const int nthreads = 256; @@ -464,9 +464,56 @@ void extract_descriptors(int win_height, int win_width, int block_stride_y, int int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - extract_descriptors_kernel<<>>( - img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, - block_hists, descriptors); + extract_descrs_by_rows_kernel<<>>( + img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors); + cudaSafeCall(cudaThreadSynchronize()); +} + + +template +__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x, + const int win_block_stride_y, const float* block_hists, + PtrElemStepf descriptors) +{ + // Get left top corner of the window in src + const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + + blockIdx.x * win_block_stride_x) * cblock_hist_size; + + // Get left top corner of the window in dst + float* descriptor = descriptors.ptr(blockIdx.y * gridDim.x + blockIdx.x); + + // Copy elements from src to dst + for (int i = threadIdx.x; i < cdescr_size; i += nthreads) + { + int block_idx = i / cblock_hist_size; + int idx_in_block = i - block_idx * cblock_hist_size; + + int y = block_idx / cnblocks_win_x; + int x = block_idx - y * cnblocks_win_x; + + descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] + = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; + } +} + + +void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + DevMem2Df descriptors) +{ + const int nthreads = 256; + + int win_block_stride_x = win_stride_x / block_stride_x; + int win_block_stride_y = win_stride_y / block_stride_y; + int img_win_width = (width - win_width + win_stride_x) / win_stride_x; + int img_win_height = (height - win_height + win_stride_y) / win_stride_y; + dim3 threads(nthreads, 1); + dim3 grid(img_win_width, img_win_height); + + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; + extract_descrs_by_cols_kernel<<>>( + img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors); cudaSafeCall(cudaThreadSynchronize()); } diff --git a/modules/gpu/src/hog.cpp b/modules/gpu/src/hog.cpp index 7f0b3eaec..ad4b66e21 100644 --- a/modules/gpu/src/hog.cpp +++ b/modules/gpu/src/hog.cpp @@ -50,11 +50,9 @@ size_t cv::gpu::HOGDescriptor::getBlockHistogramSize() const { throw_nogpu(); re double cv::gpu::HOGDescriptor::getWinSigma() const { throw_nogpu(); return 0; } bool cv::gpu::HOGDescriptor::checkDetectorSize() const { throw_nogpu(); return false; } void cv::gpu::HOGDescriptor::setSVMDetector(const vector&) { throw_nogpu(); } -void cv::gpu::HOGDescriptor::computeGradient(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } -void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat&) { throw_nogpu(); } void cv::gpu::HOGDescriptor::detect(const GpuMat&, vector&, double, Size, Size) { throw_nogpu(); } void cv::gpu::HOGDescriptor::detectMultiScale(const GpuMat&, vector&, double, Size, Size, double, int) { throw_nogpu(); } -void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&) { throw_nogpu(); } +void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { throw_nogpu(); } std::vector cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector(); } std::vector cv::gpu::HOGDescriptor::getPeopleDetector_48x96() { throw_nogpu(); return std::vector(); } std::vector cv::gpu::HOGDescriptor::getPeopleDetector_64x128() { throw_nogpu(); return std::vector(); } @@ -78,9 +76,12 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int width, float* block_hists, float* coefs, float free_coef, float threshold, unsigned char* labels); -void extract_descriptors(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, float* block_hists, - cv::gpu::DevMem2Df descriptors); +void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + cv::gpu::DevMem2Df descriptors); +void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + cv::gpu::DevMem2Df descriptors); void compute_gradients_8UC1(int nbins, int height, int width, const cv::gpu::DevMem2D& img, float angle_scale, cv::gpu::DevMem2Df grad, cv::gpu::DevMem2D qangle); @@ -218,7 +219,7 @@ void cv::gpu::HOGDescriptor::computeBlockHistograms(const GpuMat& img) } -void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors) +void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format) { CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); @@ -231,9 +232,21 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, descriptors.create(wins_per_img.area(), blocks_per_win.area() * block_hist_size, CV_32F); - hog::extract_descriptors(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr(), - descriptors); + switch (descr_format) + { + case DESCR_FORMAT_ROW_BY_ROW: + hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, + win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr(), + descriptors); + break; + case DESCR_FORMAT_COL_BY_COL: + hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, + win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr(), + descriptors); + break; + default: + CV_Error(CV_StsBadArg, "Unknown descriptor format"); + } } diff --git a/tests/gpu/src/hog.cpp b/tests/gpu/src/hog.cpp index b4366b5ab..69c7c2be5 100644 --- a/tests/gpu/src/hog.cpp +++ b/tests/gpu/src/hog.cpp @@ -51,9 +51,9 @@ using namespace std; ts->set_failed_test_info(err); \ return; } -struct CV_GpuHogDetectionTest: public CvTest +struct CV_GpuHogDetectionTest: public CvTest, public cv::gpu::HOGDescriptor { - CV_GpuHogDetectionTest(): CvTest( "GPU-HOG-detect", "HOGDescriptorDetection" ) {} + CV_GpuHogDetectionTest(): CvTest("GPU-HOG-detect", "HOGDescriptorDetection") {} void run(int) { @@ -141,54 +141,53 @@ struct CV_GpuHogDetectionTest: public CvTest { cv::gpu::GpuMat d_img(img); - cv::gpu::HOGDescriptor hog; - hog.setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); + setSVMDetector(cv::gpu::HOGDescriptor::getDefaultPeopleDetector()); //cpu detector may be updated soon //hog.setSVMDetector(cv::HOGDescriptor::getDefaultPeopleDetector()); std::vector locations; // Test detect - hog.detect(d_img, locations, 0); + detect(d_img, locations, 0); #ifdef DUMP - dump(hog.block_hists, locations); + dump(block_hists, locations); #else - compare(hog.block_hists, locations); + compare(block_hists, locations); #endif // Test detect on smaller image cv::gpu::GpuMat d_img2; cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols / 2, d_img.rows / 2)); - hog.detect(d_img2, locations, 0); + detect(d_img2, locations, 0); #ifdef DUMP - dump(hog.block_hists, locations); + dump(block_hists, locations); #else - compare(hog.block_hists, locations); + compare(block_hists, locations); #endif // Test detect on greater image cv::gpu::resize(d_img, d_img2, cv::Size(d_img.cols * 2, d_img.rows * 2)); - hog.detect(d_img2, locations, 0); + detect(d_img2, locations, 0); #ifdef DUMP - dump(hog.block_hists, locations); + dump(block_hists, locations); #else - compare(hog.block_hists, locations); + compare(block_hists, locations); #endif // Test detectMultiScale std::vector rects; size_t nrects; - hog.detectMultiScale(d_img, rects, 0, cv::Size(8, 8), cv::Size(), 1.05, 2); + detectMultiScale(d_img, rects, 0, cv::Size(8, 8), cv::Size(), 1.05, 2); #ifdef DUMP nrects = rects.size(); f.write((char*)&nrects, sizeof(nrects)); for (size_t i = 0; i < rects.size(); ++i) f.write((char*)&rects[i], sizeof(rects[i])); - dump(hog.block_hists, std::vector()); + dump(block_hists, std::vector()); #else f.read((char*)&nrects, sizeof(nrects)); CHECK(nrects == rects.size(), CvTS::FAIL_INVALID_OUTPUT) @@ -198,7 +197,7 @@ struct CV_GpuHogDetectionTest: public CvTest f.read((char*)&rect, sizeof(rect)); CHECK(rect == rects[i], CvTS::FAIL_INVALID_OUTPUT); } - compare(hog.block_hists, std::vector()); + compare(block_hists, std::vector()); #endif } @@ -211,9 +210,10 @@ struct CV_GpuHogDetectionTest: public CvTest } gpu_hog_detection_test; -struct CV_GpuHogGetDescriptorsTest: public CvTest +struct CV_GpuHogGetDescriptorsTest: public CvTest, public cv::gpu::HOGDescriptor { - CV_GpuHogGetDescriptorsTest(): CvTest("GPU-HOG-getDescriptors", "HOGDescriptorGetDescriptors") {} + CV_GpuHogGetDescriptorsTest(): + CvTest("GPU-HOG-getDescriptors", "HOGDescriptorGetDescriptors"), HOGDescriptor(cv::Size(64, 128)) {} void run(int) { @@ -228,12 +228,11 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest cv::cvtColor(img_rgb, img, CV_BGR2BGRA); cv::gpu::GpuMat d_img(img); - cv::Size win_size(64, 128); - cv::gpu::HOGDescriptor hog(win_size); // Convert train images into feature vectors (train table) - cv::gpu::GpuMat descriptors; - hog.getDescriptors(d_img, win_size, descriptors); + cv::gpu::GpuMat descriptors, descriptors_by_cols; + getDescriptors(d_img, win_size, descriptors, DESCR_FORMAT_ROW_BY_ROW); + getDescriptors(d_img, win_size, descriptors_by_cols, DESCR_FORMAT_COL_BY_COL); // Check size of the result train table wins_per_img_x = 3; @@ -245,6 +244,20 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest wins_per_img_x * wins_per_img_y); CHECK(descriptors.size() == descr_size_expected, CvTS::FAIL_INVALID_OUTPUT); + // Check both formats of output descriptors are handled correctly + cv::Mat dr(descriptors); + cv::Mat dc(descriptors_by_cols); + for (int i = 0; i < wins_per_img_x * wins_per_img_y; ++i) + { + const float* l = dr.rowRange(i, i + 1).ptr(); + const float* r = dc.rowRange(i, i + 1).ptr(); + for (int y = 0; y < blocks_per_win_y; ++y) + for (int x = 0; x < blocks_per_win_x; ++x) + for (int k = 0; k < block_hist_size; ++k) + CHECK(l[(y * blocks_per_win_x + x) * block_hist_size + k] == + r[(x * blocks_per_win_y + y) * block_hist_size + k], CvTS::FAIL_INVALID_OUTPUT); + } + /* Now we want to extract the same feature vectors, but from single images. NOTE: results will be defferent, due to border values interpolation. Using of many small images is slower, however we wont't call getDescriptors and will use computeBlockHistograms instead of. computeBlockHistograms @@ -253,39 +266,39 @@ struct CV_GpuHogGetDescriptorsTest: public CvTest img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive1.png"); CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - hog.computeBlockHistograms(cv::gpu::GpuMat(img)); + computeBlockHistograms(cv::gpu::GpuMat(img)); // Everything is fine with interpolation for left top subimage - CHECK(cv::norm(hog.block_hists, descriptors.rowRange(0, 1)) == 0.f, CvTS::FAIL_INVALID_OUTPUT); + CHECK(cv::norm(block_hists, descriptors.rowRange(0, 1)) == 0.f, CvTS::FAIL_INVALID_OUTPUT); img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive2.png"); CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - hog.computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(hog.block_hists, descriptors.rowRange(1, 2)); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(block_hists, descriptors.rowRange(1, 2)); img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative1.png"); CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - hog.computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(hog.block_hists, descriptors.rowRange(2, 3)); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(block_hists, descriptors.rowRange(2, 3)); img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative2.png"); CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - hog.computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(hog.block_hists, descriptors.rowRange(3, 4)); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(block_hists, descriptors.rowRange(3, 4)); img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/positive3.png"); CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - hog.computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(hog.block_hists, descriptors.rowRange(4, 5)); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(block_hists, descriptors.rowRange(4, 5)); img_rgb = cv::imread(std::string(ts->get_data_path()) + "hog/negative3.png"); CHECK(!img_rgb.empty(), CvTS::FAIL_MISSING_TEST_DATA); cv::cvtColor(img_rgb, img, CV_BGR2BGRA); - hog.computeBlockHistograms(cv::gpu::GpuMat(img)); - compare_inner_parts(hog.block_hists, descriptors.rowRange(5, 6)); + computeBlockHistograms(cv::gpu::GpuMat(img)); + compare_inner_parts(block_hists, descriptors.rowRange(5, 6)); } catch (const cv::Exception& e) {