diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 660a6df73..515a4a275 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -1142,6 +1142,13 @@ private: //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// +struct CV_EXPORTS HOGConfidence +{ + double scale; + vector locations; + vector confidences; + vector part_scores[4]; +}; struct CV_EXPORTS HOGDescriptor { @@ -1173,6 +1180,13 @@ struct CV_EXPORTS HOGDescriptor Size padding=Size(), double scale0=1.05, int group_threshold=2); + void computeConfidence(const GpuMat& img, vector& hits, double hit_threshold, + Size win_stride, Size padding, vector& locations, vector& confidences); + + void computeConfidenceMultiScale(const GpuMat& img, vector& found_locations, + double hit_threshold, Size win_stride, Size padding, + vector &conf_out, int group_threshold); + void getDescriptors(const GpuMat& img, Size win_stride, GpuMat& descriptors, int descr_format=DESCR_FORMAT_COL_BY_COL); diff --git a/modules/gpu/src/cuda/hog.cu b/modules/gpu/src/cuda/hog.cu index 8150bf91a..eff6d2074 100644 --- a/modules/gpu/src/cuda/hog.cu +++ b/modules/gpu/src/cuda/hog.cu @@ -326,6 +326,97 @@ namespace cv { namespace gpu { namespace device // Linear SVM based classification // + // return confidence values not just positive location + template // Number of histogram block processed by single GPU thread block + __global__ void compute_confidence_hists_kernel_many_blocks(const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + const float* block_hists, const float* coefs, + float free_coef, float threshold, float* confidences) + { + const int win_x = threadIdx.z; + if (blockIdx.x * blockDim.z + win_x >= img_win_width) + return; + + const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + + blockIdx.x * win_block_stride_x * blockDim.z + win_x) * + cblock_hist_size; + + float product = 0.f; + for (int i = threadIdx.x; i < cdescr_size; i += nthreads) + { + int offset_y = i / cdescr_width; + int offset_x = i - offset_y * cdescr_width; + product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x]; + } + + __shared__ float products[nthreads * nblocks]; + + const int tid = threadIdx.z * nthreads + threadIdx.x; + products[tid] = product; + + __syncthreads(); + + if (nthreads >= 512) + { + if (threadIdx.x < 256) products[tid] = product = product + products[tid + 256]; + __syncthreads(); + } + if (nthreads >= 256) + { + if (threadIdx.x < 128) products[tid] = product = product + products[tid + 128]; + __syncthreads(); + } + if (nthreads >= 128) + { + if (threadIdx.x < 64) products[tid] = product = product + products[tid + 64]; + __syncthreads(); + } + + if (threadIdx.x < 32) + { + volatile float* smem = products; + if (nthreads >= 64) smem[tid] = product = product + smem[tid + 32]; + if (nthreads >= 32) smem[tid] = product = product + smem[tid + 16]; + if (nthreads >= 16) smem[tid] = product = product + smem[tid + 8]; + if (nthreads >= 8) smem[tid] = product = product + smem[tid + 4]; + if (nthreads >= 4) smem[tid] = product = product + smem[tid + 2]; + if (nthreads >= 2) smem[tid] = product = product + smem[tid + 1]; + } + + if (threadIdx.x == 0) + confidences[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x] + = (float)(product + free_coef); + + } + + void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + float* coefs, float free_coef, float threshold, float *confidences) + { + const int nthreads = 256; + const int nblocks = 1; + + int win_block_stride_x = win_stride_x / block_stride_x; + int win_block_stride_y = win_stride_y / block_stride_y; + int img_win_width = (width - win_width + win_stride_x) / win_stride_x; + int img_win_height = (height - win_height + win_stride_y) / win_stride_y; + + dim3 threads(nthreads, 1, nblocks); + dim3 grid(divUp(img_win_width, nblocks), img_win_height); + + cudaSafeCall(cudaFuncSetCacheConfig(compute_confidence_hists_kernel_many_blocks, + cudaFuncCachePreferL1)); + + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; + compute_confidence_hists_kernel_many_blocks<<>>( + img_win_width, img_block_width, win_block_stride_x, win_block_stride_y, + block_hists, coefs, free_coef, threshold, confidences); + cudaSafeCall(cudaThreadSynchronize()); + } + + template // Number of histogram block processed by single GPU thread block diff --git a/modules/gpu/src/hog.cpp b/modules/gpu/src/hog.cpp index 3d0b7e9e5..fafcce784 100644 --- a/modules/gpu/src/hog.cpp +++ b/modules/gpu/src/hog.cpp @@ -57,6 +57,8 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat&, Size, GpuMat&, int) { std::vector cv::gpu::HOGDescriptor::getDefaultPeopleDetector() { throw_nogpu(); return std::vector(); } std::vector cv::gpu::HOGDescriptor::getPeopleDetector48x96() { throw_nogpu(); return std::vector(); } std::vector cv::gpu::HOGDescriptor::getPeopleDetector64x128() { throw_nogpu(); return std::vector(); } +void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat&, vector&, double, Size, Size, vector&, vector&) { throw_nogpu(); } +void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat&, vector&, double, Size, Size, vector&, int) { throw_nogpu(); } #else @@ -79,6 +81,10 @@ namespace cv { namespace gpu { namespace device int width, float* block_hists, float* coefs, float free_coef, float threshold, unsigned char* labels); + void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, int height, int width, float* block_hists, + float* coefs, float free_coef, float threshold, float *confidences); + void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x, int height, int width, float* block_hists, cv::gpu::DevMem2Df descriptors); @@ -258,6 +264,99 @@ void cv::gpu::HOGDescriptor::getDescriptors(const GpuMat& img, Size win_stride, } } +void cv::gpu::HOGDescriptor::computeConfidence(const GpuMat& img, vector& hits, double hit_threshold, + Size win_stride, Size padding, vector& locations, vector& confidences) +{ + CV_Assert(padding == Size(0, 0)); + + hits.clear(); + if (detector.empty()) + return; + + computeBlockHistograms(img); + + if (win_stride == Size()) + win_stride = block_stride; + else + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); + + Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); + labels.create(1, wins_per_img.area(), CV_32F); + + hog::compute_confidence_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, + win_stride.height, win_stride.width, img.rows, img.cols, block_hists.ptr(), + detector.ptr(), (float)free_coef, (float)hit_threshold, labels.ptr()); + + labels.download(labels_host); + float* vec = labels_host.ptr(); + + // does not support roi for now.. + locations.clear(); + confidences.clear(); + for (int i = 0; i < wins_per_img.area(); i++) + { + int y = i / wins_per_img.width; + int x = i - wins_per_img.width * y; + if (vec[i] >= hit_threshold) + hits.push_back(Point(x * win_stride.width, y * win_stride.height)); + + Point pt(win_stride.width * x, win_stride.height * y); + locations.push_back(pt); + confidences.push_back((double)vec[i]); + } +} + +void cv::gpu::HOGDescriptor::computeConfidenceMultiScale(const GpuMat& img, vector& found_locations, + double hit_threshold, Size win_stride, Size padding, + vector &conf_out, int group_threshold) +{ + vector level_scale; + double scale = 1.; + int levels = 0; + + for (levels = 0; levels < conf_out.size(); levels++) + { + scale = conf_out[levels].scale; + level_scale.push_back(scale); + if (cvRound(img.cols/scale) < win_size.width || + cvRound(img.rows/scale) < win_size.height) + break; + } + + levels = std::max(levels, 1); + level_scale.resize(levels); + + std::vector all_candidates; + vector locations; + + for (size_t i = 0; i < level_scale.size(); i++) + { + double scale = level_scale[i]; + Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale)); + GpuMat smaller_img; + + if (sz == img.size()) + smaller_img = img; + else + { + smaller_img.create(sz, img.type()); + switch (img.type()) { + case CV_8UC1: hog::resize_8UC1(img, smaller_img); break; + case CV_8UC4: hog::resize_8UC4(img, smaller_img); break; + } + } + + computeConfidence(smaller_img, locations, hit_threshold, win_stride, padding, conf_out[i].locations, conf_out[i].confidences); + + Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); + for (size_t j = 0; j < locations.size(); j++) + all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size)); + } + found_locations.assign(all_candidates.begin(), all_candidates.end()); + groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); +} + void cv::gpu::HOGDescriptor::detect(const GpuMat& img, vector& hits, double hit_threshold, Size win_stride, Size padding) { diff --git a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp index a754238da..a39be7444 100644 --- a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp @@ -491,6 +491,17 @@ protected: //////////////// HOG (Histogram-of-Oriented-Gradients) Descriptor and Object Detector ////////////// +// struct for detection region of interest (ROI) +struct DetectionROI +{ + // scale(size) of the bounding box + double scale; + // set of requrested locations to be evaluated + vector locations; + // vector that will contain confidence values for each location + vector confidences; +}; + struct CV_EXPORTS_W HOGDescriptor { public: @@ -583,6 +594,23 @@ public: CV_PROP bool gammaCorrection; CV_PROP vector svmDetector; CV_PROP int nlevels; + + + // evaluate specified ROI and return confidence value for each location + virtual void detectROI(const cv::Mat& img, const vector &locations, + CV_OUT std::vector& foundLocations, CV_OUT std::vector& confidences, + double hitThreshold = 0, cv::Size winStride = Size(), + cv::Size padding = Size()) const; + + // evaluate specified ROI and return confidence value for each location in multiple scales + virtual void detectMultiScaleROI(const cv::Mat& img, + CV_OUT std::vector& foundLocations, + std::vector& locations, + double hitThreshold = 0, + int groupThreshold = 0) const; + + // read/parse Dalal's alt model file + void readALTModel(std::string modelfile); }; diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp index d96386d32..485334cc3 100644 --- a/modules/objdetect/src/hog.cpp +++ b/modules/objdetect/src/hog.cpp @@ -39,7 +39,7 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - +#include #include "precomp.hpp" #include #ifdef HAVE_IPP @@ -2382,4 +2382,238 @@ vector HOGDescriptor::getDaimlerPeopleDetector() return vector(detector, detector + sizeof(detector)/sizeof(detector[0])); } +struct HOGConfInvoker +{ + HOGConfInvoker( const HOGDescriptor* _hog, const Mat& _img, + double _hitThreshold, Size _padding, + std::vector* locs, + ConcurrentRectVector* _vec ) + { + hog = _hog; + img = _img; + hitThreshold = _hitThreshold; + padding = _padding; + locations = locs; + vec = _vec; + } + + void operator()( const BlockedRange& range ) const + { + int i, i1 = range.begin(), i2 = range.end(); + + Size maxSz(cvCeil(img.cols/(*locations)[0].scale), cvCeil(img.rows/(*locations)[0].scale)); + Mat smallerImgBuf(maxSz, img.type()); + vector dets; + + for( i = i1; i < i2; i++ ) + { + double scale = (*locations)[i].scale; + + Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale)); + Mat smallerImg(sz, img.type(), smallerImgBuf.data); + + if( sz == img.size() ) + smallerImg = Mat(sz, img.type(), img.data, img.step); + else + resize(img, smallerImg, sz); + + hog->detectROI(smallerImg, (*locations)[i].locations, dets, (*locations)[i].confidences, hitThreshold, Size(), padding); + Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale)); + for( size_t j = 0; j < dets.size(); j++ ) + vec->push_back(Rect(cvRound(dets[j].x*scale), + cvRound(dets[j].y*scale), + scaledWinSize.width, scaledWinSize.height)); + } + } + + const HOGDescriptor* hog; + Mat img; + double hitThreshold; + std::vector* locations; + Size padding; + ConcurrentRectVector* vec; +}; + +void HOGDescriptor::detectROI(const cv::Mat& img, const vector &locations, + CV_OUT std::vector& foundLocations, CV_OUT std::vector& confidences, + double hitThreshold, cv::Size winStride, + cv::Size padding) const +{ + foundLocations.clear(); + + confidences.clear(); + + if( svmDetector.empty() ) + return; + + if( locations.empty() ) + return; + + if( winStride == Size() ) + winStride = cellSize; + + Size cacheStride(gcd(winStride.width, blockStride.width), + gcd(winStride.height, blockStride.height)); + + size_t nwindows = locations.size(); + padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width); + padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height); + Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2); + + // HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride); + HOGCache cache(this, img, padding, padding, true, cacheStride); + if( !nwindows ) + nwindows = cache.windowsInImage(paddedImgSize, winStride).area(); + + const HOGCache::BlockData* blockData = &cache.blockData[0]; + + int nblocks = cache.nblocks.area(); + int blockHistogramSize = cache.blockHistogramSize; + size_t dsize = getDescriptorSize(); + + double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0; + vector blockHist(blockHistogramSize); + + for( size_t i = 0; i < nwindows; i++ ) + { + Point pt0; + pt0 = locations[i]; + if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width || + pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height ) + { + // out of image + confidences.push_back(-10.0); + continue; + } + + double s = rho; + const float* svmVec = &svmDetector[0]; + int j, k; + + for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize ) + { + const HOGCache::BlockData& bj = blockData[j]; + Point pt = pt0 + bj.imgOffset; + // need to devide this into 4 parts! + const float* vec = cache.getBlock(pt, &blockHist[0]); + for( k = 0; k <= blockHistogramSize - 4; k += 4 ) + s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] + + vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3]; + for( ; k < blockHistogramSize; k++ ) + s += vec[k]*svmVec[k]; + } + // cv::waitKey(); + confidences.push_back(s); + + if( s >= hitThreshold ) + foundLocations.push_back(pt0); + } + } + +void HOGDescriptor::detectMultiScaleROI(const cv::Mat& img, + CV_OUT std::vector& foundLocations, + std::vector& locations, + double hitThreshold, + int groupThreshold) const +{ + ConcurrentRectVector allCandidates; + + parallel_for(BlockedRange(0, (int)locations.size()), + HOGConfInvoker(this, img, hitThreshold, Size(8, 8), &locations, &allCandidates)); + + foundLocations.resize(allCandidates.size()); + std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin()); + cv::groupRectangles(foundLocations, groupThreshold, 0.2); +} + +void HOGDescriptor::readALTModel(std::string modelfile) +{ + // read model from SVMlight format.. + FILE *modelfl; + if ((modelfl = fopen(modelfile.c_str(), "rb")) == NULL) + { + std::string eerr("file not exist"); + std::string efile(__FILE__); + std::string efunc(__FUNCTION__); + throw Exception(CV_StsError, eerr, efile, efunc, __LINE__); + } + char version_buffer[10]; + if (!fread (&version_buffer,sizeof(char),10,modelfl)) + { + std::string eerr("version?"); + std::string efile(__FILE__); + std::string efunc(__FUNCTION__); + throw Exception(CV_StsError, eerr, efile, efunc, __LINE__); + } + if(strcmp(version_buffer,"V6.01")) { + std::string eerr("version doesnot match"); + std::string efile(__FILE__); + std::string efunc(__FUNCTION__); + throw Exception(CV_StsError, eerr, efile, efunc, __LINE__); + } + /* read version number */ + int version = 0; + if (!fread (&version,sizeof(int),1,modelfl)) + { throw Exception(); } + if (version < 200) + { + std::string eerr("version doesnot match"); + std::string efile(__FILE__); + std::string efunc(__FUNCTION__); + throw Exception(); + } + int kernel_type; + int nread; + nread=fread(&(kernel_type),sizeof(int),1,modelfl); + + {// ignore these + int poly_degree; + nread=fread(&(poly_degree),sizeof(int),1,modelfl); + + double rbf_gamma; + nread=fread(&(rbf_gamma),sizeof(double), 1, modelfl); + double coef_lin; + nread=fread(&(coef_lin),sizeof(double),1,modelfl); + double coef_const; + nread=fread(&(coef_const),sizeof(double),1,modelfl); + int l; + nread=fread(&l,sizeof(int),1,modelfl); + char* custom = new char[l]; + nread=fread(custom,sizeof(char),l,modelfl); + delete[] custom; + } + int totwords; + nread=fread(&(totwords),sizeof(int),1,modelfl); + {// ignore these + int totdoc; + nread=fread(&(totdoc),sizeof(int),1,modelfl); + int sv_num; + nread=fread(&(sv_num), sizeof(int),1,modelfl); + } + + double linearbias; + nread=fread(&linearbias, sizeof(double), 1, modelfl); + + std::vector detector; + detector.clear(); + if(kernel_type == 0) { /* linear kernel */ + /* save linear wts also */ + double *linearwt = new double[totwords+1]; + int length = totwords; + nread = fread(linearwt, sizeof(double), totwords + 1, modelfl); + if(nread != length + 1) + throw Exception(); + + for(int i = 0; i < length; i++) + detector.push_back((float)linearwt[i]); + + detector.push_back((float)-linearbias); + setSVMDetector(detector); + delete linearwt; + } else { + throw Exception(); + } + fclose(modelfl); +} + }