diff --git a/doc/tutorials/core/adding_images/adding_images.rst b/doc/tutorials/core/adding_images/adding_images.rst index 40e2a0d44..e3135693d 100644 --- a/doc/tutorials/core/adding_images/adding_images.rst +++ b/doc/tutorials/core/adding_images/adding_images.rst @@ -53,8 +53,8 @@ As usual, after the not-so-lengthy explanation, let's go to the code: std::cout<<"* Enter alpha [0-1]: "; std::cin>>input; - /// We use the alpha provided by the user iff it is between 0 and 1 - if( alpha >= 0 && alpha <= 1 ) + /// We use the alpha provided by the user if it is between 0 and 1 + if( input >= 0.0 && input <= 1.0 ) { alpha = input; } /// Read image ( same size, same type ) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index c4eb0041d..a7e4fd90b 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -57,180 +57,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) - -/* arithmetics */ -void cv::ocl::add(const oclMat &, const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::add(const oclMat &, const oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::add(const oclMat &, const Scalar &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::subtract(const oclMat &, const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::subtract(const oclMat &, const oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::subtract(const oclMat &, const Scalar &, oclMat &, const oclMat & ) -{ - throw_nogpu(); -} -void cv::ocl::subtract(const Scalar &, const oclMat &, oclMat &, const oclMat & ) -{ - throw_nogpu(); -} -void cv::ocl::multiply(const oclMat &, const oclMat &, oclMat &, double) -{ - throw_nogpu(); -} -void cv::ocl::divide(const oclMat &, const oclMat &, oclMat &, double) -{ - throw_nogpu(); -} -void cv::ocl::divide(double, const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::absdiff(const oclMat &, const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::absdiff(const oclMat &, const Scalar &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::compare(const oclMat &, const oclMat &, oclMat & , int) -{ - throw_nogpu(); -} -void cv::ocl::meanStdDev(const oclMat &, Scalar &, Scalar &) -{ - throw_nogpu(); -} -double cv::ocl::norm(const oclMat &, int) -{ - throw_nogpu(); - return 0.0; -} -double cv::ocl::norm(const oclMat &, const oclMat &, int) -{ - throw_nogpu(); - return 0.0; -} -void cv::ocl::flip(const oclMat &, oclMat &, int) -{ - throw_nogpu(); -} -Scalar cv::ocl::sum(const oclMat &) -{ - throw_nogpu(); - return Scalar(); -} -void cv::ocl::minMax(const oclMat &, double *, double *, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::minMaxLoc(const oclMat &, double *, double *, Point *, Point *, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::LUT(const oclMat &, const Mat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::exp(const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::log(const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::magnitude(const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::phase(const oclMat &, const oclMat &, oclMat &, bool) -{ - throw_nogpu(); -} -void cv::ocl::cartToPolar(const oclMat &, const oclMat &, oclMat &, oclMat &, bool) -{ - throw_nogpu(); -} -void cv::ocl::polarToCart(const oclMat &, const oclMat &, oclMat &, oclMat &, bool) -{ - throw_nogpu(); -} -void cv::ocl::transpose(const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::pow(const oclMat &, double, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2, double beta, double gama, oclMat &dst) -{ - throw_nogpu(); -} -void cv::ocl::magnitudeSqr(const oclMat &src1, const oclMat &src2, oclMat &dst) -{ - throw_nogpu(); -} - -/* bit wise operations */ -void cv::ocl::bitwise_not(const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::bitwise_or(const oclMat &, const oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::bitwise_and(const oclMat &, const oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::bitwise_and(const oclMat &, const Scalar &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::bitwise_xor(const oclMat &, const oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -cv::ocl::oclMat cv::ocl::operator ~ (const oclMat &) -{ - throw_nogpu(); - return oclMat(); -} -cv::ocl::oclMat cv::ocl::operator | (const oclMat &, const oclMat &) -{ - throw_nogpu(); - return oclMat(); -} -cv::ocl::oclMat cv::ocl::operator & (const oclMat &, const oclMat &) -{ - throw_nogpu(); - return oclMat(); -} -cv::ocl::oclMat cv::ocl::operator ^ (const oclMat &, const oclMat &) -{ - throw_nogpu(); - return oclMat(); -} - -#else /* !defined (HAVE_OPENCL) */ namespace cv { namespace ocl @@ -2477,5 +2303,3 @@ void cv::ocl::pow(const oclMat &x, double p, oclMat &y) arithmetic_pow_run(x, p, y, kernelName, &arithm_pow); } - -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/binarycaching.hpp b/modules/ocl/src/binarycaching.hpp index 479487843..0ec565f88 100644 --- a/modules/ocl/src/binarycaching.hpp +++ b/modules/ocl/src/binarycaching.hpp @@ -50,17 +50,6 @@ using namespace std; using std::cout; using std::endl; -#if !defined (HAVE_OPENCL) -namespace cv -{ - namespace ocl - { - //nothing - }//namespace ocl -}//namespace cv - -#else /* !defined (HAVE_OPENCL) */ - namespace cv { namespace ocl @@ -99,4 +88,3 @@ namespace cv }//namespace ocl }//namespace cv -#endif diff --git a/modules/ocl/src/blend.cpp b/modules/ocl/src/blend.cpp index 75463b807..5eead4766 100644 --- a/modules/ocl/src/blend.cpp +++ b/modules/ocl/src/blend.cpp @@ -50,13 +50,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) -void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &weights1, const oclMat &weights2, - oclMat &result) -{ - throw_nogpu(); -} -#else namespace cv { namespace ocl @@ -97,5 +90,4 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat & openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth); } -} -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/src/brute_force_matcher.cpp b/modules/ocl/src/brute_force_matcher.cpp index 9fe14d214..c81e342d3 100644 --- a/modules/ocl/src/brute_force_matcher.cpp +++ b/modules/ocl/src/brute_force_matcher.cpp @@ -51,136 +51,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) -cv::ocl::BruteForceMatcher_OCL_base::BruteForceMatcher_OCL_base(DistType) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::add(const vector &) -{ - throw_nogpu(); -} -const vector &cv::ocl::BruteForceMatcher_OCL_base::getTrainDescriptors() const -{ - throw_nogpu(); - return trainDescCollection; -} -void cv::ocl::BruteForceMatcher_OCL_base::clear() -{ - throw_nogpu(); -} -bool cv::ocl::BruteForceMatcher_OCL_base::empty() const -{ - throw_nogpu(); - return true; -} -bool cv::ocl::BruteForceMatcher_OCL_base::isMaskSupported() const -{ - throw_nogpu(); - return true; -} -void cv::ocl::BruteForceMatcher_OCL_base::matchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, vector &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, vector &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, const oclMat &, vector &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::makeGpuCollection(oclMat &, oclMat &, const vector &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::matchCollection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::matchDownload(const oclMat &, const oclMat &, const oclMat &, vector &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::matchConvert(const Mat &, const Mat &, const Mat &, vector &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::match(const oclMat &, vector &, const vector &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, int, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatchDownload(const oclMat &, const oclMat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatchConvert(const Mat &, const Mat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, const oclMat &, vector< vector > &, int, const oclMat &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Collection(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Download(const oclMat &, const oclMat &, const oclMat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch2Convert(const Mat &, const Mat &, const Mat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::knnMatch(const oclMat &, vector< vector > &, int, const vector &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchSingle(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &, float, const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, const oclMat &, vector< vector > &, float, const oclMat &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchCollection(const oclMat &, oclMat &, oclMat &, oclMat &, oclMat &, float, const vector &) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchDownload(const oclMat &, const oclMat &, const oclMat &, const oclMat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatchConvert(const Mat &, const Mat &, const Mat &, const Mat &, vector< vector > &, bool) -{ - throw_nogpu(); -} -void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &, vector< vector > &, float, const vector &, bool) -{ - throw_nogpu(); -} -#else /* !defined (HAVE_OPENCL) */ - using namespace std; namespace cv { @@ -1826,8 +1696,4 @@ void cv::ocl::BruteForceMatcher_OCL_base::radiusMatch(const oclMat &query, vecto oclMat trainIdx, imgIdx, distance, nMatches; radiusMatchCollection(query, trainIdx, imgIdx, distance, nMatches, maxDistance, masks); radiusMatchDownload(trainIdx, imgIdx, distance, nMatches, matches, compactResult); -} - -#endif - - +} \ No newline at end of file diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp index b41077ecd..c4a092993 100644 --- a/modules/ocl/src/build_warps.cpp +++ b/modules/ocl/src/build_warps.cpp @@ -49,21 +49,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) -void cv::ocl::buildWarpPlaneMaps(Size, Rect, const Mat &, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &) -{ - throw_nogpu(); -} -void cv::ocl::buildWarpCylindricalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &) -{ - throw_nogpu(); -} -void cv::ocl::buildWarpSphericalMaps(Size, Rect, const Mat &, const Mat &, float, oclMat &, oclMat &, Stream &) -{ - throw_nogpu(); -} -#else - namespace cv { namespace ocl @@ -275,6 +260,3 @@ void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, o size_t localThreads[3] = {32, 8, 1}; openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1); } - - -#endif // HAVE_OPENCL diff --git a/modules/ocl/src/canny.cpp b/modules/ocl/src/canny.cpp index 250108908..4b872a1bc 100644 --- a/modules/ocl/src/canny.cpp +++ b/modules/ocl/src/canny.cpp @@ -51,25 +51,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) -void cv::ocl::Canny(const oclMat &image, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) -{ - throw_nogpu(); -} -void cv::ocl::Canny(const oclMat &image, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false) -{ - throw_nogpu(); -} -void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false) -{ - throw_nogpu(); -} -void cv::ocl::Canny(const oclMat &dx, const oclMat &dy, CannyBuf &buf, oclMat &edges, double low_thresh, double high_thresh, bool L2gradient = false) -{ - throw_nogpu(); -} -#else - namespace cv { namespace ocl @@ -426,5 +407,3 @@ void canny::getEdges_gpu(oclMat &map, oclMat &dst, int rows, int cols) openCLExecuteKernel2(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1); } - -#endif // HAVE_OPENCL diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp index ab613e051..e14bd2756 100644 --- a/modules/ocl/src/color.cpp +++ b/modules/ocl/src/color.cpp @@ -49,18 +49,6 @@ using namespace cv; using namespace cv::ocl; -#if !defined (HAVE_OPENCL) - -void cv::ocl::cvtColor(const oclMat &, oclMat &, int, int) -{ - throw_nogpu(); -} -void cv::ocl::cvtColor(const oclMat &, oclMat &, int, int, const Stream &) -{ - throw_nogpu(); -} - -#else /* !defined (HAVE_OPENCL) */ #ifndef CV_DESCALE #define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n)) #endif @@ -289,5 +277,3 @@ void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn) { cvtColor_caller(src, dst, code, dcn); } - -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/columnsum.cpp b/modules/ocl/src/columnsum.cpp index 06c871d50..037ff234e 100644 --- a/modules/ocl/src/columnsum.cpp +++ b/modules/ocl/src/columnsum.cpp @@ -50,16 +50,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; - -#if !defined(HAVE_OPENCL) - -void cv::ocl::columnSum(const oclMat &src, oclMat &dst) -{ - throw_nogpu(); -} - -#else /*!HAVE_OPENCL */ - namespace cv { namespace ocl @@ -92,5 +82,4 @@ void cv::ocl::columnSum(const oclMat &src, oclMat &dst) openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth()); -} -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/src/error.cpp b/modules/ocl/src/error.cpp index 007c013cf..e854e70cd 100644 --- a/modules/ocl/src/error.cpp +++ b/modules/ocl/src/error.cpp @@ -46,12 +46,6 @@ using namespace cv; using namespace cv::ocl; -#if !defined (HAVE_OPENCL) - -// do nothing - -#else /* !defined (HAVE_OPENCL) */ - namespace cv { namespace ocl @@ -194,5 +188,3 @@ namespace cv } } } - -#endif diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index cbecaec79..300ae60a7 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -49,12 +49,7 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined HAVE_OPENCL -void cv::ocl::dft(const oclMat &, oclMat &, Size , int ) -{ - throw_nogpu(); -} -#elif !defined HAVE_CLAMDFFT +#if !defined HAVE_CLAMDFFT void cv::ocl::dft(const oclMat&, oclMat&, Size, int) { CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented"); diff --git a/modules/ocl/src/filtering.cpp b/modules/ocl/src/filtering.cpp index 5420021b1..5173dba37 100644 --- a/modules/ocl/src/filtering.cpp +++ b/modules/ocl/src/filtering.cpp @@ -54,121 +54,6 @@ using namespace std; using namespace cv; using namespace cv::ocl; -#if !defined (HAVE_OPENCL) - -Ptr cv::ocl::getBoxFilter_GPU(int, int, const Size &, Point, int) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::createBoxFilter_GPU(int, int, const Size &, const Point &, int) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::createFilter2D_GPU(const Ptr) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::createSeparableLinearFilter_GPU(int, int, const Mat &, const Mat &, const Point &) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::getLinearRowFilter_GPU(int, int, const Mat &, int) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::getLinearColumnFilter_GPU(int, int, const Mat &, int) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::createGaussianFilter_GPU(int, Size, double, double) -{ - throw_nogpu(); - return Ptr(0); -} - - -Ptr cv::ocl::getLinearFilter_GPU(int, int, const Mat &, const Size &, Point) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::createLinearFilter_GPU(int, int, const Mat &, const Point &) -{ - throw_nogpu(); - return Ptr(0); -} - -Ptr cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType) -{ - throw_nogpu(); - return Ptr(0); -} - -void cv::ocl::boxFilter(const oclMat &, oclMat &, int, Size, Point, int) -{ - throw_nogpu(); -} - -void cv::ocl::sepFilter2D(const oclMat &, oclMat &, int, const Mat &, const Mat &, Point) -{ - throw_nogpu(); -} - -void cv::ocl::Sobel(const oclMat &, oclMat &, int, int, int, int, double) -{ - throw_nogpu(); -} - -void cv::ocl::Scharr(const oclMat &, oclMat &, int, int, int, double) -{ - throw_nogpu(); -} - -void cv::ocl::GaussianBlur(const oclMat &, oclMat &, Size, double, double) -{ - throw_nogpu(); -} - -void cv::ocl::filter2D(const oclMat &, oclMat &, int, const Mat &, Point) -{ - throw_nogpu(); -} - -void cv::ocl::Laplacian(const oclMat &, oclMat &, int, int, double) -{ - throw_nogpu(); -} - -void cv::ocl::erode(const oclMat &, oclMat &, const Mat &, Point, int) -{ - throw_nogpu(); -} - -void cv::ocl::dilate(const oclMat &, oclMat &, const Mat &, Point, int) -{ - throw_nogpu(); -} - -void cv::ocl::morphologyEx(const oclMat &, oclMat &, int, const Mat &, Point, int) -{ - throw_nogpu(); -} - -#else /* !defined (HAVE_OPENCL) */ - //helper routines namespace cv { @@ -1705,5 +1590,3 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si Ptr f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype); f->apply(src, dst); } - -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/gemm.cpp b/modules/ocl/src/gemm.cpp index 2f11e2b36..be7e79cce 100644 --- a/modules/ocl/src/gemm.cpp +++ b/modules/ocl/src/gemm.cpp @@ -46,24 +46,14 @@ #include #include "precomp.hpp" -#ifdef HAVE_CLAMDBLAS - -#include "clAmdBlas.h" - -#if !defined HAVE_OPENCL -void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, - const oclMat &src3, double beta, oclMat &dst, int flags) -{ - throw_nogpu(); -} -#elif !defined HAVE_CLAMDBLAS -void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, - const oclMat &src3, double beta, oclMat &dst, int flags) +#if !defined HAVE_CLAMDBLAS +void cv::ocl::gemm(const oclMat&, const oclMat&, double, + const oclMat&, double, oclMat&, int) { CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); } #else - +#include "clAmdBlas.h" using namespace cv; void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, @@ -168,4 +158,3 @@ void cv::ocl::gemm(const oclMat &src1, const oclMat &src2, double alpha, clAmdBlasTeardown(); } #endif -#endif diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp index 5c9b75bf5..506dc6b0c 100644 --- a/modules/ocl/src/haar.cpp +++ b/modules/ocl/src/haar.cpp @@ -47,15 +47,10 @@ // //M*/ -/* Haar features calculation */ -//#define EMU - #include "precomp.hpp" #include #include -#ifdef EMU -#include "runCL.h" -#endif + using namespace cv; using namespace cv::ocl; using namespace std; @@ -926,7 +921,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS if( gimg.cols < minSize.width || gimg.rows < minSize.height ) CV_Error(CV_StsError, "Image too small"); - if( (flags & CV_HAAR_SCALE_IMAGE) && gimg.clCxt->impl->devName.find("Intel(R) HD Graphics") == string::npos ) + if( (flags & CV_HAAR_SCALE_IMAGE) ) { CvSize winSize0 = cascade->orig_window_size; //float scalefactor = 1.1f; @@ -1114,30 +1109,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS args.push_back ( make_pair(sizeof(cl_int4) , (void *)&p )); args.push_back ( make_pair(sizeof(cl_int4) , (void *)&pq )); args.push_back ( make_pair(sizeof(cl_float) , (void *)&correction )); - /* - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&nodebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&pixelstep)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&p)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int4), (void *)&pq)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_float), (void *)&correction));*/ - //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&n)); - //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&grpnumperline)); - //openCLSafeCall(clSetKernelArg(kernel,argcount++,sizeof(cl_int),(void*)&totalgrp)); - // openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - - // openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue)); openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascade", globalThreads, localThreads, args, -1, -1); //t = (double)cvGetTickCount() - t; //printf( "detection time = %g ms\n", t/((double)cvGetTickFrequency()*1000.) ); @@ -1258,13 +1230,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS scaleinfo[i].factor = factor; int startnodenum = nodenum * i; float factor2 = (float)factor; - /* - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&nodebuffer)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_mem), (void *)&newnodebuffer)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&factor2)); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_float), (void *)&correction[i])); - openCLSafeCall(clSetKernelArg(kernel2, argcounts++, sizeof(cl_int), (void *)&startnodenum)); - */ vector > args1; args1.push_back ( make_pair(sizeof(cl_mem) , (void *)&nodebuffer )); @@ -1298,22 +1263,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS correctionbuffer = openCLCreateBuffer(gsum.clCxt, CL_MEM_READ_ONLY, sizeof(cl_float) * loopcount); openCLSafeCall(clEnqueueWriteBuffer(gsum.clCxt->impl->clCmdQueue, correctionbuffer, 1, 0, sizeof(cl_float)*loopcount, correction, 0, NULL, NULL)); //int argcount = 0; - /*openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&stagebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&scaleinfobuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&newnodebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&gsqsum.data)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&candidatebuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&step)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&loopcount)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&endstage)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&startnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&splitnode)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&pbuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_mem), (void *)&correctionbuffer)); - openCLSafeCall(clSetKernelArg(kernel, argcount++, sizeof(cl_int), (void *)&nodenum));*/ vector > args; args.push_back ( make_pair(sizeof(cl_mem) , (void *)&stagebuffer )); @@ -1335,8 +1284,6 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS openCLExecuteKernel(gsum.clCxt, &haarobjectdetect_scaled2, "gpuRunHaarClassifierCascade_scaled2", globalThreads, localThreads, args, -1, -1); - //openCLSafeCall(clEnqueueNDRangeKernel(gsum.clCxt->impl->clCmdQueue, kernel, 2, NULL, globalThreads, localThreads, 0, NULL, NULL)); - //openCLSafeCall(clFinish(gsum.clCxt->impl->clCmdQueue)); //openCLSafeCall(clEnqueueReadBuffer(gsum.clCxt->clCmdQueue,candidatebuffer,1,0,4*sizeof(int)*outputsz,candidate,0,NULL,NULL)); candidate = (int *)clEnqueueMapBuffer(gsum.clCxt->impl->clCmdQueue, candidatebuffer, 1, CL_MAP_READ, 0, 4 * sizeof(int), 0, 0, 0, &status); @@ -1407,204 +1354,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS } -// static CvHaarClassifierCascade * gpuLoadCascadeCART( const char **input_cascade, int n, CvSize orig_window_size ) -// { -// int i; -// CvHaarClassifierCascade *cascade = gpuCreateHaarClassifierCascade(n); -// cascade->orig_window_size = orig_window_size; - -// for( i = 0; i < n; i++ ) -// { -// int j, count, l; -// float threshold = 0; -// const char *stage = input_cascade[i]; -// int dl = 0; - -// /* tree links */ -// int parent = -1; -// int next = -1; - -// sscanf( stage, "%d%n", &count, &dl ); -// stage += dl; - -// assert( count > 0 ); -// cascade->stage_classifier[i].count = count; -// cascade->stage_classifier[i].classifier = -// (CvHaarClassifier *)cvAlloc( count * sizeof(cascade->stage_classifier[i].classifier[0])); - -// for( j = 0; j < count; j++ ) -// { -// CvHaarClassifier *classifier = cascade->stage_classifier[i].classifier + j; -// int k, rects = 0; -// char str[100]; - -// sscanf( stage, "%d%n", &classifier->count, &dl ); -// stage += dl; - -// classifier->haar_feature = (CvHaarFeature *) cvAlloc( -// classifier->count * ( sizeof( *classifier->haar_feature ) + -// sizeof( *classifier->threshold ) + -// sizeof( *classifier->left ) + -// sizeof( *classifier->right ) ) + -// (classifier->count + 1) * sizeof( *classifier->alpha ) ); -// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); -// classifier->left = (int *) (classifier->threshold + classifier->count); -// classifier->right = (int *) (classifier->left + classifier->count); -// classifier->alpha = (float *) (classifier->right + classifier->count); - -// for( l = 0; l < classifier->count; l++ ) -// { -// sscanf( stage, "%d%n", &rects, &dl ); -// stage += dl; - -// assert( rects >= 2 && rects <= CV_HAAR_FEATURE_MAX ); - -// for( k = 0; k < rects; k++ ) -// { -// CvRect r; -// int band = 0; -// sscanf( stage, "%d%d%d%d%d%f%n", -// &r.x, &r.y, &r.width, &r.height, &band, -// &(classifier->haar_feature[l].rect[k].weight), &dl ); -// stage += dl; -// classifier->haar_feature[l].rect[k].r = r; -// } -// sscanf( stage, "%s%n", str, &dl ); -// stage += dl; - -// classifier->haar_feature[l].tilted = strncmp( str, "tilted", 6 ) == 0; - -// for( k = rects; k < CV_HAAR_FEATURE_MAX; k++ ) -// { -// memset( classifier->haar_feature[l].rect + k, 0, -// sizeof(classifier->haar_feature[l].rect[k]) ); -// } - -// sscanf( stage, "%f%d%d%n", &(classifier->threshold[l]), -// &(classifier->left[l]), -// &(classifier->right[l]), &dl ); -// stage += dl; -// } -// for( l = 0; l <= classifier->count; l++ ) -// { -// sscanf( stage, "%f%n", &(classifier->alpha[l]), &dl ); -// stage += dl; -// } -// } - -// sscanf( stage, "%f%n", &threshold, &dl ); -// stage += dl; - -// cascade->stage_classifier[i].threshold = threshold; - -// /* load tree links */ -// if( sscanf( stage, "%d%d%n", &parent, &next, &dl ) != 2 ) -// { -// parent = i - 1; -// next = -1; -// } -// stage += dl; - -// cascade->stage_classifier[i].parent = parent; -// cascade->stage_classifier[i].next = next; -// cascade->stage_classifier[i].child = -1; - -// if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) -// { -// cascade->stage_classifier[parent].child = i; -// } -// } - -// return cascade; -// } - #ifndef _MAX_PATH #define _MAX_PATH 1024 #endif -// static CvHaarClassifierCascade * gpuLoadHaarClassifierCascade( const char *directory, CvSize orig_window_size ) -// { -// const char **input_cascade = 0; -// CvHaarClassifierCascade *cascade = 0; - -// int i, n; -// const char *slash; -// char name[_MAX_PATH]; -// int size = 0; -// char *ptr = 0; - -// if( !directory ) -// CV_Error( CV_StsNullPtr, "Null path is passed" ); - -// n = (int)strlen(directory) - 1; -// slash = directory[n] == '\\' || directory[n] == '/' ? "" : "/"; - -// /* try to read the classifier from directory */ -// for( n = 0; ; n++ ) -// { -// sprintf( name, "%s%s%d/AdaBoostCARTHaarClassifier.txt", directory, slash, n ); -// FILE *f = fopen( name, "rb" ); -// if( !f ) -// break; -// fseek( f, 0, SEEK_END ); -// size += ftell( f ) + 1; -// fclose(f); -// } - -// if( n == 0 && slash[0] ) -// return (CvHaarClassifierCascade *)cvLoad( directory ); - -// if( n == 0 ) -// CV_Error( CV_StsBadArg, "Invalid path" ); - -// size += (n + 1) * sizeof(char *); -// input_cascade = (const char **)cvAlloc( size ); -// ptr = (char *)(input_cascade + n + 1); - -// for( i = 0; i < n; i++ ) -// { -// sprintf( name, "%s/%d/AdaBoostCARTHaarClassifier.txt", directory, i ); -// FILE *f = fopen( name, "rb" ); -// if( !f ) -// CV_Error( CV_StsError, "" ); -// fseek( f, 0, SEEK_END ); -// size = ftell( f ); -// fseek( f, 0, SEEK_SET ); -// CV_Assert((size_t)size == fread( ptr, 1, size, f )); -// fclose(f); -// input_cascade[i] = ptr; -// ptr += size; -// *ptr++ = '\0'; -// } - -// input_cascade[n] = 0; -// cascade = gpuLoadCascadeCART( input_cascade, n, orig_window_size ); - -// if( input_cascade ) -// cvFree( &input_cascade ); - -// return cascade; -// } - - -// static void gpuReleaseHaarClassifierCascade( CvHaarClassifierCascade **_cascade ) -// { -// if( _cascade && *_cascade ) -// { -// int i, j; -// CvHaarClassifierCascade *cascade = *_cascade; - -// for( i = 0; i < cascade->count; i++ ) -// { -// for( j = 0; j < cascade->stage_classifier[i].count; j++ ) -// cvFree( &cascade->stage_classifier[i].classifier[j].haar_feature ); -// cvFree( &cascade->stage_classifier[i].classifier ); -// } -// gpuReleaseHidHaarClassifierCascade( (GpuHidHaarClassifierCascade **)&cascade->hid_cascade ); -// cvFree( _cascade ); -// } -// } - /****************************************************************************************\ * Persistence functions * @@ -1627,937 +1380,11 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS #define ICV_HAAR_PARENT_NAME "parent" #define ICV_HAAR_NEXT_NAME "next" -// static int gpuIsHaarClassifier( const void *struct_ptr ) -// { -// return CV_IS_HAAR_CLASSIFIER( struct_ptr ); -// } - -// static void * gpuReadHaarClassifier( CvFileStorage *fs, CvFileNode *node ) -// { -// CvHaarClassifierCascade *cascade = NULL; - -// char buf[256]; -// CvFileNode *seq_fn = NULL; /* sequence */ -// CvFileNode *fn = NULL; -// CvFileNode *stages_fn = NULL; -// CvSeqReader stages_reader; -// int n; -// int i, j, k, l; -// int parent, next; - -// stages_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_STAGES_NAME ); -// if( !stages_fn || !CV_NODE_IS_SEQ( stages_fn->tag) ) -// CV_Error( CV_StsError, "Invalid stages node" ); - -// n = stages_fn->data.seq->total; -// cascade = gpuCreateHaarClassifierCascade(n); - -// /* read size */ -// seq_fn = cvGetFileNodeByName( fs, node, ICV_HAAR_SIZE_NAME ); -// if( !seq_fn || !CV_NODE_IS_SEQ( seq_fn->tag ) || seq_fn->data.seq->total != 2 ) -// CV_Error( CV_StsError, "size node is not a valid sequence." ); -// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 0 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) -// CV_Error( CV_StsError, "Invalid size node: width must be positive integer" ); -// cascade->orig_window_size.width = fn->data.i; -// fn = (CvFileNode *) cvGetSeqElem( seq_fn->data.seq, 1 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 ) -// CV_Error( CV_StsError, "Invalid size node: height must be positive integer" ); -// cascade->orig_window_size.height = fn->data.i; - -// cvStartReadSeq( stages_fn->data.seq, &stages_reader ); -// for( i = 0; i < n; ++i ) -// { -// CvFileNode *stage_fn; -// CvFileNode *trees_fn; -// CvSeqReader trees_reader; - -// stage_fn = (CvFileNode *) stages_reader.ptr; -// if( !CV_NODE_IS_MAP( stage_fn->tag ) ) -// { -// sprintf( buf, "Invalid stage %d", i ); -// CV_Error( CV_StsError, buf ); -// } - -// trees_fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_TREES_NAME ); -// if( !trees_fn || !CV_NODE_IS_SEQ( trees_fn->tag ) -// || trees_fn->data.seq->total <= 0 ) -// { -// sprintf( buf, "Trees node is not a valid sequence. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } - -// cascade->stage_classifier[i].classifier = -// (CvHaarClassifier *) cvAlloc( trees_fn->data.seq->total -// * sizeof( cascade->stage_classifier[i].classifier[0] ) ); -// for( j = 0; j < trees_fn->data.seq->total; ++j ) -// { -// cascade->stage_classifier[i].classifier[j].haar_feature = NULL; -// } -// cascade->stage_classifier[i].count = trees_fn->data.seq->total; - -// cvStartReadSeq( trees_fn->data.seq, &trees_reader ); -// for( j = 0; j < trees_fn->data.seq->total; ++j ) -// { -// CvFileNode *tree_fn; -// CvSeqReader tree_reader; -// CvHaarClassifier *classifier; -// int last_idx; - -// classifier = &cascade->stage_classifier[i].classifier[j]; -// tree_fn = (CvFileNode *) trees_reader.ptr; -// if( !CV_NODE_IS_SEQ( tree_fn->tag ) || tree_fn->data.seq->total <= 0 ) -// { -// sprintf( buf, "Tree node is not a valid sequence." -// " (stage %d, tree %d)", i, j ); -// CV_Error( CV_StsError, buf ); -// } - -// classifier->count = tree_fn->data.seq->total; -// classifier->haar_feature = (CvHaarFeature *) cvAlloc( -// classifier->count * ( sizeof( *classifier->haar_feature ) + -// sizeof( *classifier->threshold ) + -// sizeof( *classifier->left ) + -// sizeof( *classifier->right ) ) + -// (classifier->count + 1) * sizeof( *classifier->alpha ) ); -// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); -// classifier->left = (int *) (classifier->threshold + classifier->count); -// classifier->right = (int *) (classifier->left + classifier->count); -// classifier->alpha = (float *) (classifier->right + classifier->count); - -// cvStartReadSeq( tree_fn->data.seq, &tree_reader ); -// for( k = 0, last_idx = 0; k < tree_fn->data.seq->total; ++k ) -// { -// CvFileNode *node_fn; -// CvFileNode *feature_fn; -// CvFileNode *rects_fn; -// CvSeqReader rects_reader; - -// node_fn = (CvFileNode *) tree_reader.ptr; -// if( !CV_NODE_IS_MAP( node_fn->tag ) ) -// { -// sprintf( buf, "Tree node %d is not a valid map. (stage %d, tree %d)", -// k, i, j ); -// CV_Error( CV_StsError, buf ); -// } -// feature_fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_FEATURE_NAME ); -// if( !feature_fn || !CV_NODE_IS_MAP( feature_fn->tag ) ) -// { -// sprintf( buf, "Feature node is not a valid map. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// rects_fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_RECTS_NAME ); -// if( !rects_fn || !CV_NODE_IS_SEQ( rects_fn->tag ) -// || rects_fn->data.seq->total < 1 -// || rects_fn->data.seq->total > CV_HAAR_FEATURE_MAX ) -// { -// sprintf( buf, "Rects node is not a valid sequence. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// cvStartReadSeq( rects_fn->data.seq, &rects_reader ); -// for( l = 0; l < rects_fn->data.seq->total; ++l ) -// { -// CvFileNode *rect_fn; -// CvRect r; - -// rect_fn = (CvFileNode *) rects_reader.ptr; -// if( !CV_NODE_IS_SEQ( rect_fn->tag ) || rect_fn->data.seq->total != 5 ) -// { -// sprintf( buf, "Rect %d is not a valid sequence. " -// "(stage %d, tree %d, node %d)", l, i, j, k ); -// CV_Error( CV_StsError, buf ); -// } - -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 0 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) -// { -// sprintf( buf, "x coordinate must be non-negative integer. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.x = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 1 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i < 0 ) -// { -// sprintf( buf, "y coordinate must be non-negative integer. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.y = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 2 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 -// || r.x + fn->data.i > cascade->orig_window_size.width ) -// { -// sprintf( buf, "width must be positive integer and " -// "(x + width) must not exceed window width. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.width = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 3 ); -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= 0 -// || r.y + fn->data.i > cascade->orig_window_size.height ) -// { -// sprintf( buf, "height must be positive integer and " -// "(y + height) must not exceed window height. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } -// r.height = fn->data.i; -// fn = CV_SEQ_ELEM( rect_fn->data.seq, CvFileNode, 4 ); -// if( !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "weight must be real number. " -// "(stage %d, tree %d, node %d, rect %d)", i, j, k, l ); -// CV_Error( CV_StsError, buf ); -// } - -// classifier->haar_feature[k].rect[l].weight = (float) fn->data.f; -// classifier->haar_feature[k].rect[l].r = r; - -// CV_NEXT_SEQ_ELEM( sizeof( *rect_fn ), rects_reader ); -// } /* for each rect */ -// for( l = rects_fn->data.seq->total; l < CV_HAAR_FEATURE_MAX; ++l ) -// { -// classifier->haar_feature[k].rect[l].weight = 0; -// classifier->haar_feature[k].rect[l].r = cvRect( 0, 0, 0, 0 ); -// } - -// fn = cvGetFileNodeByName( fs, feature_fn, ICV_HAAR_TILTED_NAME); -// if( !fn || !CV_NODE_IS_INT( fn->tag ) ) -// { -// sprintf( buf, "tilted must be 0 or 1. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->haar_feature[k].tilted = ( fn->data.i != 0 ); -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_THRESHOLD_NAME); -// if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "threshold must be real number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->threshold[k] = (float) fn->data.f; -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_NODE_NAME); -// if( fn ) -// { -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k -// || fn->data.i >= tree_fn->data.seq->total ) -// { -// sprintf( buf, "left node must be valid node number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* left node */ -// classifier->left[k] = fn->data.i; -// } -// else -// { -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_LEFT_VAL_NAME ); -// if( !fn ) -// { -// sprintf( buf, "left node or left value must be specified. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// if( !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "left value must be real number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* left value */ -// if( last_idx >= classifier->count + 1 ) -// { -// sprintf( buf, "Tree structure is broken: too many values. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->left[k] = -last_idx; -// classifier->alpha[last_idx++] = (float) fn->data.f; -// } -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_NODE_NAME); -// if( fn ) -// { -// if( !CV_NODE_IS_INT( fn->tag ) || fn->data.i <= k -// || fn->data.i >= tree_fn->data.seq->total ) -// { -// sprintf( buf, "right node must be valid node number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* right node */ -// classifier->right[k] = fn->data.i; -// } -// else -// { -// fn = cvGetFileNodeByName( fs, node_fn, ICV_HAAR_RIGHT_VAL_NAME ); -// if( !fn ) -// { -// sprintf( buf, "right node or right value must be specified. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// if( !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "right value must be real number. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// /* right value */ -// if( last_idx >= classifier->count + 1 ) -// { -// sprintf( buf, "Tree structure is broken: too many values. " -// "(stage %d, tree %d, node %d)", i, j, k ); -// CV_Error( CV_StsError, buf ); -// } -// classifier->right[k] = -last_idx; -// classifier->alpha[last_idx++] = (float) fn->data.f; -// } - -// CV_NEXT_SEQ_ELEM( sizeof( *node_fn ), tree_reader ); -// } /* for each node */ -// if( last_idx != classifier->count + 1 ) -// { -// sprintf( buf, "Tree structure is broken: too few values. " -// "(stage %d, tree %d)", i, j ); -// CV_Error( CV_StsError, buf ); -// } - -// CV_NEXT_SEQ_ELEM( sizeof( *tree_fn ), trees_reader ); -// } /* for each tree */ - -// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_STAGE_THRESHOLD_NAME); -// if( !fn || !CV_NODE_IS_REAL( fn->tag ) ) -// { -// sprintf( buf, "stage threshold must be real number. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } -// cascade->stage_classifier[i].threshold = (float) fn->data.f; - -// parent = i - 1; -// next = -1; - -// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_PARENT_NAME ); -// if( !fn || !CV_NODE_IS_INT( fn->tag ) -// || fn->data.i < -1 || fn->data.i >= cascade->count ) -// { -// sprintf( buf, "parent must be integer number. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } -// parent = fn->data.i; -// fn = cvGetFileNodeByName( fs, stage_fn, ICV_HAAR_NEXT_NAME ); -// if( !fn || !CV_NODE_IS_INT( fn->tag ) -// || fn->data.i < -1 || fn->data.i >= cascade->count ) -// { -// sprintf( buf, "next must be integer number. (stage %d)", i ); -// CV_Error( CV_StsError, buf ); -// } -// next = fn->data.i; - -// cascade->stage_classifier[i].parent = parent; -// cascade->stage_classifier[i].next = next; -// cascade->stage_classifier[i].child = -1; - -// if( parent != -1 && cascade->stage_classifier[parent].child == -1 ) -// { -// cascade->stage_classifier[parent].child = i; -// } - -// CV_NEXT_SEQ_ELEM( sizeof( *stage_fn ), stages_reader ); -// } /* for each stage */ - -// return cascade; -// } - -// static void gpuWriteHaarClassifier( CvFileStorage *fs, const char *name, const void *struct_ptr, -// CvAttrList attributes ) -// { -// int i, j, k, l; -// char buf[256]; -// const CvHaarClassifierCascade *cascade = (const CvHaarClassifierCascade *) struct_ptr; - -// /* TODO: parameters check */ - -// cvStartWriteStruct( fs, name, CV_NODE_MAP, CV_TYPE_NAME_HAAR, attributes ); - -// cvStartWriteStruct( fs, ICV_HAAR_SIZE_NAME, CV_NODE_SEQ | CV_NODE_FLOW ); -// cvWriteInt( fs, NULL, cascade->orig_window_size.width ); -// cvWriteInt( fs, NULL, cascade->orig_window_size.height ); -// cvEndWriteStruct( fs ); /* size */ - -// cvStartWriteStruct( fs, ICV_HAAR_STAGES_NAME, CV_NODE_SEQ ); -// for( i = 0; i < cascade->count; ++i ) -// { -// cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); -// sprintf( buf, "stage %d", i ); -// cvWriteComment( fs, buf, 1 ); - -// cvStartWriteStruct( fs, ICV_HAAR_TREES_NAME, CV_NODE_SEQ ); - -// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) -// { -// CvHaarClassifier *tree = &cascade->stage_classifier[i].classifier[j]; - -// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ ); -// sprintf( buf, "tree %d", j ); -// cvWriteComment( fs, buf, 1 ); - -// for( k = 0; k < tree->count; ++k ) -// { -// CvHaarFeature *feature = &tree->haar_feature[k]; - -// cvStartWriteStruct( fs, NULL, CV_NODE_MAP ); -// if( k ) -// { -// sprintf( buf, "node %d", k ); -// } -// else -// { -// sprintf( buf, "root node" ); -// } -// cvWriteComment( fs, buf, 1 ); - -// cvStartWriteStruct( fs, ICV_HAAR_FEATURE_NAME, CV_NODE_MAP ); - -// cvStartWriteStruct( fs, ICV_HAAR_RECTS_NAME, CV_NODE_SEQ ); -// for( l = 0; l < CV_HAAR_FEATURE_MAX && feature->rect[l].r.width != 0; ++l ) -// { -// cvStartWriteStruct( fs, NULL, CV_NODE_SEQ | CV_NODE_FLOW ); -// cvWriteInt( fs, NULL, feature->rect[l].r.x ); -// cvWriteInt( fs, NULL, feature->rect[l].r.y ); -// cvWriteInt( fs, NULL, feature->rect[l].r.width ); -// cvWriteInt( fs, NULL, feature->rect[l].r.height ); -// cvWriteReal( fs, NULL, feature->rect[l].weight ); -// cvEndWriteStruct( fs ); /* rect */ -// } -// cvEndWriteStruct( fs ); /* rects */ -// cvWriteInt( fs, ICV_HAAR_TILTED_NAME, feature->tilted ); -// cvEndWriteStruct( fs ); /* feature */ - -// cvWriteReal( fs, ICV_HAAR_THRESHOLD_NAME, tree->threshold[k]); - -// if( tree->left[k] > 0 ) -// { -// cvWriteInt( fs, ICV_HAAR_LEFT_NODE_NAME, tree->left[k] ); -// } -// else -// { -// cvWriteReal( fs, ICV_HAAR_LEFT_VAL_NAME, -// tree->alpha[-tree->left[k]] ); -// } - -// if( tree->right[k] > 0 ) -// { -// cvWriteInt( fs, ICV_HAAR_RIGHT_NODE_NAME, tree->right[k] ); -// } -// else -// { -// cvWriteReal( fs, ICV_HAAR_RIGHT_VAL_NAME, -// tree->alpha[-tree->right[k]] ); -// } - -// cvEndWriteStruct( fs ); /* split */ -// } - -// cvEndWriteStruct( fs ); /* tree */ -// } - -// cvEndWriteStruct( fs ); /* trees */ - -// cvWriteReal( fs, ICV_HAAR_STAGE_THRESHOLD_NAME, cascade->stage_classifier[i].threshold); -// cvWriteInt( fs, ICV_HAAR_PARENT_NAME, cascade->stage_classifier[i].parent ); -// cvWriteInt( fs, ICV_HAAR_NEXT_NAME, cascade->stage_classifier[i].next ); - -// cvEndWriteStruct( fs ); /* stage */ -// } /* for each stage */ - -// cvEndWriteStruct( fs ); /* stages */ -// cvEndWriteStruct( fs ); /* root */ -// } - -// static void * gpuCloneHaarClassifier( const void *struct_ptr ) -// { -// CvHaarClassifierCascade *cascade = NULL; - -// int i, j, k, n; -// const CvHaarClassifierCascade *cascade_src = -// (const CvHaarClassifierCascade *) struct_ptr; - -// n = cascade_src->count; -// cascade = gpuCreateHaarClassifierCascade(n); -// cascade->orig_window_size = cascade_src->orig_window_size; - -// for( i = 0; i < n; ++i ) -// { -// cascade->stage_classifier[i].parent = cascade_src->stage_classifier[i].parent; -// cascade->stage_classifier[i].next = cascade_src->stage_classifier[i].next; -// cascade->stage_classifier[i].child = cascade_src->stage_classifier[i].child; -// cascade->stage_classifier[i].threshold = cascade_src->stage_classifier[i].threshold; - -// cascade->stage_classifier[i].count = 0; -// cascade->stage_classifier[i].classifier = -// (CvHaarClassifier *) cvAlloc( cascade_src->stage_classifier[i].count -// * sizeof( cascade->stage_classifier[i].classifier[0] ) ); - -// cascade->stage_classifier[i].count = cascade_src->stage_classifier[i].count; - -// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) -// cascade->stage_classifier[i].classifier[j].haar_feature = NULL; - -// for( j = 0; j < cascade->stage_classifier[i].count; ++j ) -// { -// const CvHaarClassifier *classifier_src = -// &cascade_src->stage_classifier[i].classifier[j]; -// CvHaarClassifier *classifier = -// &cascade->stage_classifier[i].classifier[j]; - -// classifier->count = classifier_src->count; -// classifier->haar_feature = (CvHaarFeature *) cvAlloc( -// classifier->count * ( sizeof( *classifier->haar_feature ) + -// sizeof( *classifier->threshold ) + -// sizeof( *classifier->left ) + -// sizeof( *classifier->right ) ) + -// (classifier->count + 1) * sizeof( *classifier->alpha ) ); -// classifier->threshold = (float *) (classifier->haar_feature + classifier->count); -// classifier->left = (int *) (classifier->threshold + classifier->count); -// classifier->right = (int *) (classifier->left + classifier->count); -// classifier->alpha = (float *) (classifier->right + classifier->count); -// for( k = 0; k < classifier->count; ++k ) -// { -// classifier->haar_feature[k] = classifier_src->haar_feature[k]; -// classifier->threshold[k] = classifier_src->threshold[k]; -// classifier->left[k] = classifier_src->left[k]; -// classifier->right[k] = classifier_src->right[k]; -// classifier->alpha[k] = classifier_src->alpha[k]; -// } -// classifier->alpha[classifier->count] = -// classifier_src->alpha[classifier->count]; -// } -// } - -// return cascade; -// } - -#if 0 -CvType haar_type( CV_TYPE_NAME_HAAR, gpuIsHaarClassifier, - (CvReleaseFunc)gpuReleaseHaarClassifierCascade, - gpuReadHaarClassifier, gpuWriteHaarClassifier, - gpuCloneHaarClassifier ); - - -namespace cv +static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, CvPoint pt, int start_stage */) { - -HaarClassifierCascade::HaarClassifierCascade() {} -HaarClassifierCascade::HaarClassifierCascade(const String &filename) -{ - load(filename); -} - -bool HaarClassifierCascade::load(const String &filename) -{ - cascade = Ptr((CvHaarClassifierCascade *)cvLoad(filename.c_str(), 0, 0, 0)); - return (CvHaarClassifierCascade *)cascade != 0; -} - -void HaarClassifierCascade::detectMultiScale( const Mat &image, - Vector &objects, double scaleFactor, - int minNeighbors, int flags, - Size minSize ) -{ - MemStorage storage(cvCreateMemStorage(0)); - CvMat _image = image; - CvSeq *_objects = gpuHaarDetectObjects( &_image, cascade, storage, scaleFactor, - minNeighbors, flags, minSize ); - Seq(_objects).copyTo(objects); -} - -int HaarClassifierCascade::runAt(Point pt, int startStage, int) const -{ - return gpuRunHaarClassifierCascade(cascade, pt, startStage); -} - -void HaarClassifierCascade::setImages( const Mat &sum, const Mat &sqsum, - const Mat &tilted, double scale ) -{ - CvMat _sum = sum, _sqsum = sqsum, _tilted = tilted; - gpuSetImagesForHaarClassifierCascade( cascade, &_sum, &_sqsum, &_tilted, scale ); -} - -} -#endif - - - - - - - - - - - - - - - -/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////reserved functios////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - -/*#if CV_SSE2 -# if CV_SSE4 || defined __SSE4__ -# include -# else -# define _mm_blendv_pd(a, b, m) _mm_xor_pd(a, _mm_and_pd(_mm_xor_pd(b, a), m)) -# define _mm_blendv_ps(a, b, m) _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(b, a), m)) -# endif -#if defined CV_ICC -# define CV_HAAR_USE_SSE 1 -#endif -#endif*/ - - -/* -CV_IMPL void -gpuSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade, -const CvArr* _sum, -const CvArr* _sqsum, -const CvArr* _tilted_sum, -double scale ) -{ -CvMat sum_stub, *sum = (CvMat*)_sum; -CvMat sqsum_stub, *sqsum = (CvMat*)_sqsum; -CvMat tilted_stub, *tilted = (CvMat*)_tilted_sum; -GpuHidHaarClassifierCascade* cascade; -int coi0 = 0, coi1 = 0; -int i; -int datasize; -int totalclassifier; -CvRect equRect; -double weight_scale; -int rows,cols; - -if( !CV_IS_HAAR_CLASSIFIER(_cascade) ) -CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier pointer" ); - -if( scale <= 0 ) -CV_Error( CV_StsOutOfRange, "Scale must be positive" ); - -sum = cvGetMat( sum, &sum_stub, &coi0 ); -sqsum = cvGetMat( sqsum, &sqsum_stub, &coi1 ); - -if( coi0 || coi1 ) -CV_Error( CV_BadCOI, "COI is not supported" ); - -if( !CV_ARE_SIZES_EQ( sum, sqsum )) -CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); - -if( CV_MAT_TYPE(sqsum->type) != CV_64FC1 || -CV_MAT_TYPE(sum->type) != CV_32SC1 ) -CV_Error( CV_StsUnsupportedFormat, -"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - -if( !_cascade->hid_cascade ) -gpuCreateHidHaarClassifierCascade(_cascade,&datasize,&totalclassifier); - -cascade =(GpuHidHaarClassifierCascade *)_cascade->hid_cascade; - -if( cascade->has_tilted_features ) -{ -tilted = cvGetMat( tilted, &tilted_stub, &coi1 ); - -if( CV_MAT_TYPE(tilted->type) != CV_32SC1 ) -CV_Error( CV_StsUnsupportedFormat, -"Only (32s, 64f, 32s) combination of (sum,sqsum,tilted_sum) formats is allowed" ); - -if( sum->step != tilted->step ) -CV_Error( CV_StsUnmatchedSizes, -"Sum and tilted_sum must have the same stride (step, widthStep)" ); - -if( !CV_ARE_SIZES_EQ( sum, tilted )) -CV_Error( CV_StsUnmatchedSizes, "All integral images must have the same size" ); -//cascade->tilted = *tilted; -} - -_cascade->scale = scale; -_cascade->real_window_size.width = cvRound( _cascade->orig_window_size.width * scale ); -_cascade->real_window_size.height = cvRound( _cascade->orig_window_size.height * scale ); - -//cascade->sum = *sum; -//cascade->sqsum = *sqsum; - -equRect.x = equRect.y = cvRound(scale); -equRect.width = cvRound((_cascade->orig_window_size.width-2)*scale); -equRect.height = cvRound((_cascade->orig_window_size.height-2)*scale); -weight_scale = 1./(equRect.width*equRect.height); -cascade->inv_window_area = weight_scale; - -cascade->p0 = sum_elem_ptr(*sum, equRect.y, equRect.x); -cascade->p1 = sum_elem_ptr(*sum, equRect.y, equRect.x + equRect.width ); -cascade->p2 = sum_elem_ptr(*sum, equRect.y + equRect.height, equRect.x ); -cascade->p3 = sum_elem_ptr(*sum, equRect.y + equRect.height, -equRect.x + equRect.width ); -*/ -/* rows=sum->rows; -cols=sum->cols; -cascade->p0 = equRect.y*cols + equRect.x; -cascade->p1 = equRect.y*cols + equRect.x + equRect.width; -cascade->p2 = (equRect.y + equRect.height) * cols + equRect.x; -cascade->p3 = (equRect.y + equRect.height) * cols + equRect.x + equRect.width ; -*/ -/* -cascade->pq0 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x); -cascade->pq1 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x + equRect.width ); -cascade->pq2 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, equRect.x ); -cascade->pq3 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, -equRect.x + equRect.width ); -*/ -/* init pointers in haar features according to real window size and -given image pointers */ -/* for( i = 0; i < _cascade->count; i++ ) -{ -int j, k, l; -for( j = 0; j < cascade->stage_classifier[i].count; j++ ) -{ -for( l = 0; l < cascade->stage_classifier[i].classifier[j].count; l++ ) -{ -CvHaarFeature* feature = -&_cascade->stage_classifier[i].classifier[j].haar_feature[l]; -*/ /* GpuHidHaarClassifier* classifier = -cascade->stage_classifier[i].classifier + j; */ -//GpuHidHaarFeature* hidfeature = -// &cascade->stage_classifier[i].classifier[j].node[l].feature; -/* double sum0 = 0, area0 = 0; -CvRect r[3]; - -int base_w = -1, base_h = -1; -int new_base_w = 0, new_base_h = 0; -int kx, ky; -int flagx = 0, flagy = 0; -int x0 = 0, y0 = 0; -int nr; -*/ -/* align blocks */ -/* for( k = 0; k < CV_HAAR_FEATURE_MAX; k++ ) -{ -//if( !hidfeature->rect[k].p0 ) -// break; -r[k] = feature->rect[k].r; -base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) ); -base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) ); -base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) ); -base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) ); -} - -nr = k; - -base_w += 1; -base_h += 1; -kx = r[0].width / base_w; -ky = r[0].height / base_h; - -if( kx <= 0 ) -{ -flagx = 1; -new_base_w = cvRound( r[0].width * scale ) / kx; -x0 = cvRound( r[0].x * scale ); -} - -if( ky <= 0 ) -{ -flagy = 1; -new_base_h = cvRound( r[0].height * scale ) / ky; -y0 = cvRound( r[0].y * scale ); -} - -for( k = 0; k < nr; k++ ) -{ -CvRect tr; -double correction_ratio; - -if( flagx ) -{ -tr.x = (r[k].x - r[0].x) * new_base_w / base_w + x0; -tr.width = r[k].width * new_base_w / base_w; -} -else -{ -tr.x = cvRound( r[k].x * scale ); -tr.width = cvRound( r[k].width * scale ); -} - -if( flagy ) -{ -tr.y = (r[k].y - r[0].y) * new_base_h / base_h + y0; -tr.height = r[k].height * new_base_h / base_h; -} -else -{ -tr.y = cvRound( r[k].y * scale ); -tr.height = cvRound( r[k].height * scale ); -} - -#if CV_ADJUST_WEIGHTS -{ -// RAINER START -const float orig_feature_size = (float)(feature->rect[k].r.width)*feature->rect[k].r.height; -const float orig_norm_size = (float)(_cascade->orig_window_size.width)*(_cascade->orig_window_size.height); -const float feature_size = float(tr.width*tr.height); -//const float normSize = float(equRect.width*equRect.height); -float target_ratio = orig_feature_size / orig_norm_size; -//float isRatio = featureSize / normSize; -//correctionRatio = targetRatio / isRatio / normSize; -correction_ratio = target_ratio / feature_size; -// RAINER END -} -#else -correction_ratio = weight_scale * (!feature->tilted ? 1 : 0.5); -#endif - -if( !feature->tilted ) -{ -hidfeature->rect[k].p0 = tr.y * rows + tr.x; -hidfeature->rect[k].p1 = tr.y * rows + tr.x + tr.width; -hidfeature->rect[k].p2 = (tr.y + tr.height) * rows + tr.x; -hidfeature->rect[k].p3 = (tr.y + tr.height) * rows + tr.x + tr.width; - -} -else -{ -hidfeature->rect[k].p2 = (tr.y + tr.width) * rows + tr.x + tr.width; -hidfeature->rect[k].p3 = (tr.y + tr.width + tr.height) * rows + tr.x + tr.width - tr.height; -hidfeature->rect[k].p0 = tr.y*rows + tr.x; -hidfeature->rect[k].p1 = (tr.y + tr.height) * rows + tr.x - tr.height; - -} - -//hidfeature->rect[k].weight = (float)(feature->rect[k].weight * correction_ratio); - -if( k == 0 ) -area0 = tr.width * tr.height; -else -;// sum0 += hidfeature->rect[k].weight * tr.width * tr.height; -} - -//hidfeature->rect[0].weight = (float)(-sum0/area0);*/ -// } /* l */ -// } /* j */ -// } -//} -/* -CV_INLINE -double gpuEvalHidHaarClassifier( GpuHidHaarClassifier *classifier, -double variance_norm_factor, -size_t p_offset ) -{ - - int idx = 0; - do - { - GpuHidHaarTreeNode* node = classifier->node + idx; - double t = node->threshold * variance_norm_factor; - - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - - if( node->feature.rect[2].p0 ) - sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; - - idx = sum < t ? node->left : node->right; - } - while( idx > 0 ); - return classifier->alpha[-idx]; - - return 0.; -} - - -*/ -static int gpuRunHaarClassifierCascade( /*const CvHaarClassifierCascade *_cascade, -CvPoint pt, int start_stage */) -{ - /* - int result = -1; - - int p_offset, pq_offset; - int i, j; - double mean, variance_norm_factor; - GpuHidHaarClassifierCascade* cascade; - - if( !CV_IS_HAAR_CLASSIFIER(_cascade) ) - CV_Error( !_cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid cascade pointer" ); - - cascade = (GpuHidHaarClassifierCascade*) _cascade->hid_cascade; - if( !cascade ) - CV_Error( CV_StsNullPtr, "Hidden cascade has not been created.\n" - "Use gpuSetImagesForHaarClassifierCascade" ); - - if( pt.x < 0 || pt.y < 0 || - pt.x + _cascade->real_window_size.width >= cascade->sum.width-2 || - pt.y + _cascade->real_window_size.height >= cascade->sum.height-2 ) - return -1; - - p_offset = pt.y * (cascade->sum.step/sizeof(sumtype)) + pt.x; - pq_offset = pt.y * (cascade->sqsum.step/sizeof(sqsumtype)) + pt.x; - mean = calc_sum(*cascade,p_offset)*cascade->inv_window_area; - variance_norm_factor = cascade->pq0[pq_offset] - cascade->pq1[pq_offset] - - cascade->pq2[pq_offset] + cascade->pq3[pq_offset]; - variance_norm_factor = variance_norm_factor*cascade->inv_window_area - mean*mean; - if( variance_norm_factor >= 0. ) - variance_norm_factor = sqrt(variance_norm_factor); - else - variance_norm_factor = 1.; - - - if( cascade->is_stump_based ) - { - for( i = start_stage; i < cascade->count; i++ ) - { - double stage_sum = 0; - - if( cascade->stage_classifier[i].two_rects ) - { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - GpuHidHaarTreeNode* node = classifier->node; - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - stage_sum += classifier->alpha[sum >= t]; - } - } - else - { - for( j = 0; j < cascade->stage_classifier[i].count; j++ ) - { - GpuHidHaarClassifier* classifier = cascade->stage_classifier[i].classifier + j; - GpuHidHaarTreeNode* node = classifier->node; - double t = node->threshold*variance_norm_factor; - double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; - sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; - if( node->feature.rect[2].p0 ) - sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; - - stage_sum += classifier->alpha[sum >= t]; - } - } - - if( stage_sum < cascade->stage_classifier[i].threshold ) - return -i; - } - } - */ return 1; } - namespace cv { namespace ocl @@ -2671,78 +1498,3 @@ struct gpuHaarDetectObjects_ScaleCascade_Invoker } } - -/* -typedef struct _ALIGNED_ON(128) GpuHidHaarFeature -{ -struct _ALIGNED_ON(32) -{ -int p0 _ALIGNED_ON(4); -int p1 _ALIGNED_ON(4); -int p2 _ALIGNED_ON(4); -int p3 _ALIGNED_ON(4); -float weight _ALIGNED_ON(4); -} -rect[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(32); -} -GpuHidHaarFeature; - - -typedef struct _ALIGNED_ON(128) GpuHidHaarTreeNode -{ -int left _ALIGNED_ON(4); -int right _ALIGNED_ON(4); -float threshold _ALIGNED_ON(4); -int p0[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -int p1[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -int p2[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -int p3[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -float weight[CV_HAAR_FEATURE_MAX] _ALIGNED_ON(16); -float alpha[2] _ALIGNED_ON(8); -// GpuHidHaarFeature feature __attribute__((aligned (128))); -} -GpuHidHaarTreeNode; - - -typedef struct _ALIGNED_ON(32) GpuHidHaarClassifier -{ -int count _ALIGNED_ON(4); -//CvHaarFeature* orig_feature; -GpuHidHaarTreeNode* node _ALIGNED_ON(8); -float* alpha _ALIGNED_ON(8); -} -GpuHidHaarClassifier; - - -typedef struct _ALIGNED_ON(64) __attribute__((aligned (64))) GpuHidHaarStageClassifier -{ -int count _ALIGNED_ON(4); -float threshold _ALIGNED_ON(4); -int two_rects _ALIGNED_ON(4); -GpuHidHaarClassifier* classifier _ALIGNED_ON(8); -struct GpuHidHaarStageClassifier* next _ALIGNED_ON(8); -struct GpuHidHaarStageClassifier* child _ALIGNED_ON(8); -struct GpuHidHaarStageClassifier* parent _ALIGNED_ON(8); -} -GpuHidHaarStageClassifier; - - -typedef struct _ALIGNED_ON(64) GpuHidHaarClassifierCascade -{ -int count _ALIGNED_ON(4); -int is_stump_based _ALIGNED_ON(4); -int has_tilted_features _ALIGNED_ON(4); -int is_tree _ALIGNED_ON(4); -int pq0 _ALIGNED_ON(4); -int pq1 _ALIGNED_ON(4); -int pq2 _ALIGNED_ON(4); -int pq3 _ALIGNED_ON(4); -int p0 _ALIGNED_ON(4); -int p1 _ALIGNED_ON(4); -int p2 _ALIGNED_ON(4); -int p3 _ALIGNED_ON(4); -float inv_window_area _ALIGNED_ON(4); -// GpuHidHaarStageClassifier* stage_classifier __attribute__((aligned (8))); -}GpuHidHaarClassifierCascade; -*/ -/* End of file. */ diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index 7eca4fe0f..59062ae49 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -49,69 +49,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) - -cv::ocl::HOGDescriptor::HOGDescriptor(Size, Size, Size, Size, int, double, double, bool, int) -{ - throw_nogpu(); -} -size_t cv::ocl::HOGDescriptor::getDescriptorSize() const -{ - throw_nogpu(); - return 0; -} -size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const -{ - throw_nogpu(); - return 0; -} -double cv::ocl::HOGDescriptor::getWinSigma() const -{ - throw_nogpu(); - return 0; -} -bool cv::ocl::HOGDescriptor::checkDetectorSize() const -{ - throw_nogpu(); - return false; -} -void cv::ocl::HOGDescriptor::setSVMDetector(const vector &) -{ - throw_nogpu(); -} -void cv::ocl::HOGDescriptor::detect(const oclMat &, vector &, double, Size, Size) -{ - throw_nogpu(); -} -void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &, vector &, double, Size, Size, double, int) -{ - throw_nogpu(); -} -void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &, Size, oclMat &, int) -{ - throw_nogpu(); -} -std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() -{ - throw_nogpu(); - return std::vector(); -} -std::vector cv::ocl::HOGDescriptor::getPeopleDetector48x96() -{ - throw_nogpu(); - return std::vector(); -} -std::vector cv::ocl::HOGDescriptor::getPeopleDetector64x128() -{ - throw_nogpu(); - return std::vector(); -} - -#else #define CELL_WIDTH 8 #define CELL_HEIGHT 8 @@ -1895,5 +1832,3 @@ void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz openCLExecuteKernel2(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); } - -#endif diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index f7d0c4394..8fbada1d3 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -59,62 +59,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) - - -void cv::ocl::meanShiftFiltering(const oclMat &, oclMat &, int, int, TermCriteria) -{ - throw_nogpu(); -} -void cv::ocl::meanShiftProc(const oclMat &, oclMat &, oclMat &, int, int, TermCriteria) -{ - throw_nogpu(); -} -double cv::ocl::threshold(const oclMat &, oclMat &, double, int) -{ - throw_nogpu(); - return 0.0; -} -void cv::ocl::resize(const oclMat &, oclMat &, Size, double, double, int) -{ - throw_nogpu(); -} -void cv::ocl::remap(const oclMat &, oclMat &, oclMat &, oclMat &, int, int , const Scalar &) -{ - throw_nogpu(); -} - -void cv::ocl::copyMakeBorder(const oclMat &, oclMat &, int, int, int, int, const Scalar &) -{ - throw_nogpu(); -} -void cv::ocl::warpAffine(const oclMat &, oclMat &, const Mat &, Size, int) -{ - throw_nogpu(); -} -void cv::ocl::warpPerspective(const oclMat &, oclMat &, const Mat &, Size, int) -{ - throw_nogpu(); -} -void cv::ocl::integral(const oclMat &, oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::calcHist(const oclMat &, oclMat &hist) -{ - throw_nogpu(); -} -void cv::ocl::bilateralFilter(const oclMat &, oclMat &, int, double, double, int) -{ - throw_nogpu(); -} -void cv::ocl::convolve(const oclMat &, const oclMat &, oclMat &) -{ - throw_nogpu(); -} - -#else /* !defined (HAVE_OPENCL) */ - namespace cv { namespace ocl @@ -1696,4 +1640,3 @@ void cv::ocl::convolve(const oclMat &x, const oclMat &t, oclMat &y) convolve_run(x, t, y, kernelName, &imgproc_convolve); } -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index eba92a7de..5d4adfceb 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -61,82 +61,6 @@ using std::endl; //#define AMD_DOUBLE_DIFFER -#if !defined (HAVE_OPENCL) - -namespace cv -{ - namespace ocl - { - - cl_device_id getDevice() - { - throw_nogpu(); - return 0; - } - - void getComputeCapability(cl_device_id, int &major, int &minor) - { - throw_nogpu(); - } - - void openCLMallocPitch(Context * /*clCxt*/, void ** /*dev_ptr*/, size_t * /*pitch*/, - size_t /*widthInBytes*/, size_t /*height*/) - { - throw_nogpu(); - } - - void openCLMemcpy2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/, - const void * /*src*/, size_t /*spitch*/, - size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/) - { - throw_nogpu(); - } - - void openCLCopyBuffer2D(Context * /*clCxt*/, void * /*dst*/, size_t /*dpitch*/, - const void * /*src*/, size_t /*spitch*/, - size_t /*width*/, size_t /*height*/, enum openCLMemcpyKind /*kind*/) - { - throw_nogpu(); - } - - cl_mem openCLCreateBuffer(Context *, size_t, size_t) - { - throw_nogpu(); - } - - void openCLReadBuffer(Context *, cl_mem, void *, size_t) - { - throw_nogpu(); - } - - void openCLFree(void * /*devPtr*/) - { - throw_nogpu(); - } - - cl_kernel openCLGetKernelFromSource(const Context * /*clCxt*/, - const char ** /*fileName*/, string /*kernelName*/) - { - throw_nogpu(); - } - - void openCLVerifyKernel(const Context * /*clCxt*/, cl_kernel /*kernel*/, size_t * /*blockSize*/, - size_t * /*globalThreads*/, size_t * /*localThreads*/) - { - throw_nogpu(); - } - - cl_mem load_constant(cl_context context, cl_command_queue command_queue, const void *value, - const size_t size) - { - throw_nogpu(); - } - - }//namespace ocl -}//namespace cv - -#else /* !defined (HAVE_OPENCL) */ - namespace cv { namespace ocl @@ -1013,4 +937,3 @@ namespace cv }//namespace ocl }//namespace cv -#endif diff --git a/modules/ocl/src/interpolate_frames.cpp b/modules/ocl/src/interpolate_frames.cpp index d6b402093..6b9f53b85 100644 --- a/modules/ocl/src/interpolate_frames.cpp +++ b/modules/ocl/src/interpolate_frames.cpp @@ -50,17 +50,6 @@ using namespace std; using namespace cv; using namespace cv::ocl; - -#if !defined (HAVE_OPENCL) -void cv::ocl::interpolateFrames(const oclMat &frame0, const oclMat &frame1, - const oclMat &fu, const oclMat &fv, - const oclMat &bu, const oclMat &bv, - float pos, oclMat &newFrame, oclMat &buf) -{ - throw_nogpu(); -} -#else - namespace cv { namespace ocl @@ -311,5 +300,4 @@ void interpolate::bindImgTex(const oclMat &img, cl_mem &texture) clEnqueueCopyBufferToImage(img.clCxt->impl->clCmdQueue, (cl_mem)img.data, texture, 0, origin, region, 0, NULL, 0); openCLSafeCall(err); } -#endif//(HAVE_OPENCL) diff --git a/modules/ocl/src/kernels/haarobjectdetect.cl b/modules/ocl/src/kernels/haarobjectdetect.cl index 95cfa63c1..7835b4bcc 100644 --- a/modules/ocl/src/kernels/haarobjectdetect.cl +++ b/modules/ocl/src/kernels/haarobjectdetect.cl @@ -9,6 +9,7 @@ // Niko Li, newlife20080214@gmail.com // Wang Weiyan, wangweiyanster@gmail.com // Jia Haipeng, jiahaipeng95@gmail.com +// Nathan, liujun@multicorewareinc.com // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -47,14 +48,14 @@ typedef float sqsumtype; typedef struct __attribute__((aligned (128))) GpuHidHaarFeature { struct __attribute__((aligned (32))) - { - int p0 __attribute__((aligned (4))); - int p1 __attribute__((aligned (4))); - int p2 __attribute__((aligned (4))); - int p3 __attribute__((aligned (4))); - float weight __attribute__((aligned (4))); - } - rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32))); +{ + int p0 __attribute__((aligned (4))); + int p1 __attribute__((aligned (4))); + int p2 __attribute__((aligned (4))); + int p3 __attribute__((aligned (4))); + float weight __attribute__((aligned (4))); +} +rect[CV_HAAR_FEATURE_MAX] __attribute__((aligned (32))); } GpuHidHaarFeature; @@ -108,31 +109,31 @@ typedef struct __attribute__((aligned (64))) GpuHidHaarClassifierCascade int p2 __attribute__((aligned (4))); int p3 __attribute__((aligned (4))); float inv_window_area __attribute__((aligned (4))); -}GpuHidHaarClassifierCascade; +} GpuHidHaarClassifierCascade; __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCascade(//constant GpuHidHaarClassifierCascade * cascade, - global GpuHidHaarStageClassifier * stagecascadeptr, - global int4 * info, - global GpuHidHaarTreeNode * nodeptr, - global const int * restrict sum1, - global const float * restrict sqsum1, - global int4 * candidate, - const int pixelstep, - const int loopcount, - const int start_stage, - const int split_stage, - const int end_stage, - const int startnode, - const int splitnode, - const int4 p, - const int4 pq, - const float correction - //const int width, - //const int height, - //const int grpnumperline, - //const int totalgrp - ) + global GpuHidHaarStageClassifier * stagecascadeptr, + global int4 * info, + global GpuHidHaarTreeNode * nodeptr, + global const int * restrict sum1, + global const float * restrict sqsum1, + global int4 * candidate, + const int pixelstep, + const int loopcount, + const int start_stage, + const int split_stage, + const int end_stage, + const int startnode, + const int splitnode, + const int4 p, + const int4 pq, + const float correction + //const int width, + //const int height, + //const int grpnumperline, + //const int totalgrp +) { int grpszx = get_local_size(0); int grpszy = get_local_size(1); @@ -184,7 +185,7 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa __global const int * sum = sum1 + imgoff; __global const float * sqsum = sqsum1 + imgoff; - for(int grploop=grpidx;grploop=0.f ? sqrt(variance_norm_factor) : 1.f; @@ -270,19 +271,19 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa info2.z +=lcl_off; float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] - - lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; + lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] - - lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; + lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; //if((info3.z - info3.x) && (!stageinfo.z)) //{ - info3.x +=lcl_off; - info3.z +=lcl_off; - classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; + info3.x +=lcl_off; + info3.z +=lcl_off; + classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - + lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; //} stage_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; nodecounter++; @@ -299,12 +300,13 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa } barrier(CLK_LOCAL_MEM_FENCE); int queuecount = lclcount[0]; + barrier(CLK_LOCAL_MEM_FENCE); nodecounter = splitnode; - for(int stageloop = split_stage; stageloop< end_stage && queuecount>0;stageloop++) + for(int stageloop = split_stage; stageloop< end_stage && queuecount>0; stageloop++) { - //barrier(CLK_LOCAL_MEM_FENCE); + //barrier(CLK_LOCAL_MEM_FENCE); //if(lcl_id == 0) - lclcount[0]=0; + lclcount[0]=0; barrier(CLK_LOCAL_MEM_FENCE); int2 stageinfo = *(global int2*)(stagecascadeptr+stageloop); @@ -316,70 +318,73 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa int lcl_compute_win_id = (lcl_id >>(6-perfscale)); int lcl_loops = (stageinfo.x + lcl_compute_win -1) >> (6-perfscale); int lcl_compute_id = lcl_id - (lcl_compute_win_id << (6-perfscale)); - for(int queueloop=0;queueloop>16),readwidth,temp_coord & 0xffff); - //barrier(CLK_LOCAL_MEM_FENCE); - if(lcl_compute_win_id < queuecount) { - - int tempnodecounter = lcl_compute_id; - float part_sum = 0.f; - for(int lcl_loop=0;lcl_loopp[0][0])); - int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); - int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); - float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); - float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); - float nodethreshold = w.w * variance_norm_factor; + int tempnodecounter = lcl_compute_id; + float part_sum = 0.f; + for(int lcl_loop=0; lcl_loopp[0][0])); + int4 info2 = *(__global int4*)(&(currentnodeptr->p[1][0])); + int4 info3 = *(__global int4*)(&(currentnodeptr->p[2][0])); + float4 w = *(__global float4*)(&(currentnodeptr->weight[0])); + float2 alpha2 = *(__global float2*)(&(currentnodeptr->alpha[0])); + float nodethreshold = w.w * variance_norm_factor; - float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] - - lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; + info1.x +=queue_pixel; + info1.z +=queue_pixel; + info2.x +=queue_pixel; + info2.z +=queue_pixel; + + float classsum = (lcldata[mad24(info1.y,readwidth,info1.x)] - lcldata[mad24(info1.y,readwidth,info1.z)] - + lcldata[mad24(info1.w,readwidth,info1.x)] + lcldata[mad24(info1.w,readwidth,info1.z)]) * w.x; - classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] - - lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; - //if((info3.z - info3.x) && (!stageinfo.z)) - //{ + classsum += (lcldata[mad24(info2.y,readwidth,info2.x)] - lcldata[mad24(info2.y,readwidth,info2.z)] - + lcldata[mad24(info2.w,readwidth,info2.x)] + lcldata[mad24(info2.w,readwidth,info2.z)]) * w.y; + //if((info3.z - info3.x) && (!stageinfo.z)) + //{ info3.x +=queue_pixel; info3.z +=queue_pixel; classsum += (lcldata[mad24(info3.y,readwidth,info3.x)] - lcldata[mad24(info3.y,readwidth,info3.z)] - - lcldata[mad24(info3.w,readwidth,info3.x)] + lcldata[mad24(info3.w,readwidth,info3.z)]) * w.z; - //} - part_sum += classsum >= nodethreshold ? alpha2.y : alpha2.x; - tempnodecounter +=lcl_compute_win; - }//end for(int lcl_loop=0;lcl_loop= nodethreshold ? alpha2.y : alpha2.x; + tempnodecounter +=lcl_compute_win; + }//end for(int lcl_loop=0;lcl_loop= stagethreshold && (lcl_compute_id==0)) + { + int queueindex = atomic_inc(lclcount); + lcloutindex[queueindex<<1] = temp_coord; + lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor); + } + lcl_compute_win_id +=(1<= stagethreshold && (lcl_compute_id==0)) - { - int queueindex = atomic_inc(lclcount); - lcloutindex[queueindex<<1] = temp_coord; - lcloutindex[(queueindex<<1)+1] = as_int(variance_norm_factor); - } - lcl_compute_win_id +=(1<0;stageloop++) //barrier(CLK_LOCAL_MEM_FENCE); @@ -420,139 +425,139 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa - /* - if(stagecascade->two_rects) - { - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1,p_offset,0) * t1.weight[0]; - - classsum += calc_sum1(t1, p_offset,1) * t1.weight[1]; - stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0]; - - counter++; - } - } - else - { - #pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t = node[counter].threshold*variance_norm_factor; - classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0]; - classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1]; - - if( node[counter].p0[2] ) - classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2]; - - stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify - - counter++; - } - } - */ - /* -__kernel void gpuRunHaarClassifierCascade_ScaleWindow( - constant GpuHidHaarClassifierCascade * _cascade, - global GpuHidHaarStageClassifier * stagecascadeptr, - //global GpuHidHaarClassifier * classifierptr, - global GpuHidHaarTreeNode * nodeptr, - global int * sum, - global float * sqsum, - global int * _candidate, - int pixel_step, - int cols, - int rows, - int start_stage, - int end_stage, - //int counts, - int nodenum, - int ystep, - int detect_width, - //int detect_height, - int loopcount, - int outputstep) - //float scalefactor) +/* +if(stagecascade->two_rects) { - unsigned int x1 = get_global_id(0); - unsigned int y1 = get_global_id(1); - int p_offset; - int m, n; - int result; - int counter; - float mean, variance_norm_factor; - for(int i=0;icount; n++ ) { - constant GpuHidHaarClassifierCascade * cascade = _cascade + i; - global int * candidate = _candidate + i*outputstep; - int window_width = cascade->p1 - cascade->p0; - int window_height = window_width; - result = 1; - counter = 0; - unsigned int x = mul24(x1,ystep); - unsigned int y = mul24(y1,ystep); - if((x < cols - window_width - 1) && (y < rows - window_height -1)) - { - global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage; - //global GpuHidHaarClassifier *classifier = classifierptr; - global GpuHidHaarTreeNode *node = nodeptr + nodenum*i; + t1 = *(node + counter); + t = t1.threshold * variance_norm_factor; + classsum = calc_sum1(t1,p_offset,0) * t1.weight[0]; - p_offset = mad24(y, pixel_step, x);// modify + classsum += calc_sum1(t1, p_offset,1) * t1.weight[1]; + stage_sum += classsum >= t ? t1.alpha[1]:t1.alpha[0]; - mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) - - *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3)) - *cascade->inv_window_area; - - variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) - - *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset); - variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean; - variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify - - // if( cascade->is_stump_based ) - //{ - for( m = start_stage; m < end_stage; m++ ) - { - float stage_sum = 0.f; - float t, classsum; - GpuHidHaarTreeNode t1; - - //#pragma unroll - for( n = 0; n < stagecascade->count; n++ ) - { - t1 = *(node + counter); - t = t1.threshold * variance_norm_factor; - classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1]; - - if((t1.p0[2]) && (!stagecascade->two_rects)) - classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2]; - - stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify - counter++; - } - - if (stage_sum < stagecascade->threshold) - { - result = 0; - break; - } - - stagecascade++; - - } - if(result) - { - candidate[4 * (y1 * detect_width + x1)] = x; - candidate[4 * (y1 * detect_width + x1) + 1] = y; - candidate[4 * (y1 * detect_width + x1)+2] = window_width; - candidate[4 * (y1 * detect_width + x1) + 3] = window_height; - } - //} - } + counter++; } } +else +{ + #pragma unroll + for( n = 0; n < stagecascade->count; n++ ) + { + t = node[counter].threshold*variance_norm_factor; + classsum = calc_sum1(node[counter],p_offset,0) * node[counter].weight[0]; + classsum += calc_sum1(node[counter],p_offset,1) * node[counter].weight[1]; + + if( node[counter].p0[2] ) + classsum += calc_sum1(node[counter],p_offset,2) * node[counter].weight[2]; + + stage_sum += classsum >= t ? node[counter].alpha[1]:node[counter].alpha[0];// modify + + counter++; + } +} +*/ +/* +__kernel void gpuRunHaarClassifierCascade_ScaleWindow( + constant GpuHidHaarClassifierCascade * _cascade, + global GpuHidHaarStageClassifier * stagecascadeptr, + //global GpuHidHaarClassifier * classifierptr, + global GpuHidHaarTreeNode * nodeptr, + global int * sum, + global float * sqsum, + global int * _candidate, + int pixel_step, + int cols, + int rows, + int start_stage, + int end_stage, + //int counts, + int nodenum, + int ystep, + int detect_width, + //int detect_height, + int loopcount, + int outputstep) + //float scalefactor) +{ +unsigned int x1 = get_global_id(0); +unsigned int y1 = get_global_id(1); +int p_offset; +int m, n; +int result; +int counter; +float mean, variance_norm_factor; +for(int i=0;ip1 - cascade->p0; +int window_height = window_width; +result = 1; +counter = 0; +unsigned int x = mul24(x1,ystep); +unsigned int y = mul24(y1,ystep); +if((x < cols - window_width - 1) && (y < rows - window_height -1)) +{ +global GpuHidHaarStageClassifier *stagecascade = stagecascadeptr +cascade->count*i+ start_stage; +//global GpuHidHaarClassifier *classifier = classifierptr; +global GpuHidHaarTreeNode *node = nodeptr + nodenum*i; + +p_offset = mad24(y, pixel_step, x);// modify + +mean = (*(sum + p_offset + (int)cascade->p0) - *(sum + p_offset + (int)cascade->p1) - + *(sum + p_offset + (int)cascade->p2) + *(sum + p_offset + (int)cascade->p3)) + *cascade->inv_window_area; + +variance_norm_factor = *(sqsum + p_offset + cascade->p0) - *(sqsum + cascade->p1 + p_offset) - + *(sqsum + p_offset + cascade->p2) + *(sqsum + cascade->p3 + p_offset); +variance_norm_factor = variance_norm_factor * cascade->inv_window_area - mean * mean; +variance_norm_factor = variance_norm_factor >=0.f ? sqrt(variance_norm_factor) : 1;//modify + +// if( cascade->is_stump_based ) +//{ +for( m = start_stage; m < end_stage; m++ ) +{ +float stage_sum = 0.f; +float t, classsum; +GpuHidHaarTreeNode t1; + +//#pragma unroll +for( n = 0; n < stagecascade->count; n++ ) +{ + t1 = *(node + counter); + t = t1.threshold * variance_norm_factor; + classsum = calc_sum1(t1, p_offset ,0) * t1.weight[0] + calc_sum1(t1, p_offset ,1) * t1.weight[1]; + + if((t1.p0[2]) && (!stagecascade->two_rects)) + classsum += calc_sum1(t1, p_offset, 2) * t1.weight[2]; + + stage_sum += classsum >= t ? t1.alpha[1] : t1.alpha[0];// modify + counter++; +} + +if (stage_sum < stagecascade->threshold) +{ + result = 0; + break; +} + +stagecascade++; + +} +if(result) +{ + candidate[4 * (y1 * detect_width + x1)] = x; + candidate[4 * (y1 * detect_width + x1) + 1] = y; + candidate[4 * (y1 * detect_width + x1)+2] = window_width; + candidate[4 * (y1 * detect_width + x1) + 3] = window_height; +} +//} +} +} +} */ diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp index 8a2705646..ab867d4d3 100644 --- a/modules/ocl/src/match_template.cpp +++ b/modules/ocl/src/match_template.cpp @@ -51,12 +51,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) -void cv::ocl::matchTemplate(const oclMat &, const oclMat &, oclMat &) -{ - throw_nogpu(); -} -#else //helper routines namespace cv { @@ -498,4 +492,3 @@ void cv::ocl::matchTemplate(const oclMat &image, const oclMat &templ, oclMat &re CV_Assert(caller); caller(image, templ, result, buf); } -#endif // diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 7b90218d9..f0e65f9cc 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -58,60 +58,6 @@ using namespace std; //////////////////////////////// oclMat //////////////////////////////// //////////////////////////////////////////////////////////////////////// -#if !defined (HAVE_OPENCL) - -namespace cv -{ - namespace ocl - { - void oclMat::upload(const Mat & /*m*/) - { - throw_nogpu(); - } - void oclMat::download(cv::Mat & /*m*/) const - { - throw_nogpu(); - } - void oclMat::copyTo( oclMat & /*m*/ ) const - { - throw_nogpu(); - } - void oclMat::copyTo( oclMat & /*m*/, const oclMat &/* mask */) const - { - throw_nogpu(); - } - void oclMat::convertTo( oclMat & /*m*/, int /*rtype*/, double /*alpha*/, double /*beta*/ ) const - { - throw_nogpu(); - } - oclMat &oclMat::operator = (const Scalar & /*s*/) - { - throw_nogpu(); - return *this; - } - oclMat &oclMat::setTo(const Scalar & /*s*/, const oclMat & /*mask*/) - { - throw_nogpu(); - return *this; - } - oclMat oclMat::reshape(int /*new_cn*/, int /*new_rows*/) const - { - throw_nogpu(); - return oclMat(); - } - void oclMat::create(int /*_rows*/, int /*_cols*/, int /*_type*/) - { - throw_nogpu(); - } - void oclMat::release() - { - throw_nogpu(); - } - } -} - -#else /* !defined (HAVE_OPENCL) */ - //helper routines namespace cv { @@ -1045,4 +991,3 @@ oclMat& cv::ocl::oclMat::operator/=( const oclMat& m ) divide(*this, m, *this); return *this; } -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/mcwutil.hpp b/modules/ocl/src/mcwutil.hpp index 8db61f163..d1986b93b 100644 --- a/modules/ocl/src/mcwutil.hpp +++ b/modules/ocl/src/mcwutil.hpp @@ -47,9 +47,6 @@ #define _OPENCV_MCWUTIL_ #include "precomp.hpp" - -#if defined (HAVE_OPENCL) - using namespace std; namespace cv @@ -76,5 +73,5 @@ namespace cv }//namespace ocl }//namespace cv -#endif // HAVE_OPENCL + #endif //_OPENCV_MCWUTIL_ diff --git a/modules/ocl/src/mssegmentation.cpp b/modules/ocl/src/mssegmentation.cpp index 48bc96f41..300265bc2 100644 --- a/modules/ocl/src/mssegmentation.cpp +++ b/modules/ocl/src/mssegmentation.cpp @@ -44,23 +44,6 @@ #include "precomp.hpp" -#if !defined(HAVE_OPENCL) - -namespace cv -{ - namespace ocl - { - - void meanShiftSegmentation(const oclMat &, Mat &, int, int, int, TermCriteria) - { - throw_nogpu(); - } - - } -} - -#else - using namespace std; // Auxiliray stuff @@ -411,4 +394,3 @@ namespace cv } } -#endif // #if !defined (HAVE_OPENCL) diff --git a/modules/ocl/src/pyrlk.cpp b/modules/ocl/src/pyrlk.cpp index 87c536ec9..d4dbfd506 100644 --- a/modules/ocl/src/pyrlk.cpp +++ b/modules/ocl/src/pyrlk.cpp @@ -52,13 +52,6 @@ using namespace std; using namespace cv; using namespace cv::ocl; -#if !defined (HAVE_OPENCL) - -void cv::ocl::PyrLKOpticalFlow::sparse(const oclMat &, const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat &) { } -void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &, const oclMat &, oclMat &, oclMat &, oclMat *) { } - -#else /* !defined (HAVE_OPENCL) */ - namespace cv { namespace ocl @@ -862,5 +855,3 @@ void cv::ocl::PyrLKOpticalFlow::dense(const oclMat &prevImg, const oclMat &nextI clFinish(prevImg.clCxt->impl->clCmdQueue); } - -#endif /* !defined (HAVE_CUDA) */ diff --git a/modules/ocl/src/pyrup.cpp b/modules/ocl/src/pyrup.cpp index ebd3535b6..fa7f73d4c 100644 --- a/modules/ocl/src/pyrup.cpp +++ b/modules/ocl/src/pyrup.cpp @@ -54,13 +54,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#ifndef HAVE_OPENCL -void cv::ocl::pyrUp(const oclMat &, GpuMat &, oclMat &) -{ - throw_nogpu(); -} -#else - namespace cv { namespace ocl @@ -93,5 +86,4 @@ namespace cv openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth()); } } -}; -#endif // HAVE_OPENCL \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp index b071ff870..e7aad4382 100644 --- a/modules/ocl/src/split_merge.cpp +++ b/modules/ocl/src/split_merge.cpp @@ -58,34 +58,6 @@ using std::endl; ///////////////// oclMat merge and split /////////////////////////////// //////////////////////////////////////////////////////////////////////// -#if !defined (HAVE_OPENCL) - -namespace cv -{ - namespace ocl - { - void cv::ocl::merge(const oclMat *src_mat, size_t count, oclMat &dst_mat) - { - throw_nogpu(); - } - void cv::ocl::merge(const vector &src_mat, oclMat &dst_mat) - { - throw_nogpu(); - } - - void cv::ocl::split(const oclMat &src, oclMat *dst) - { - throw_nogpu(); - } - void cv::ocl::split(const oclMat &src, vector &dst) - { - throw_nogpu(); - } - } -} - -#else /* !defined (HAVE_OPENCL) */ - namespace cv { namespace ocl @@ -417,4 +389,3 @@ void cv::ocl::split(const oclMat &src, vector &dst) if(src.oclchannels() > 0) split_merge::split(src, &dst[0]); } -#endif /* !defined (HAVE_OPENCL) */ diff --git a/modules/ocl/src/surf.cpp b/modules/ocl/src/surf.cpp index 71a7aacd3..65dc86d16 100644 --- a/modules/ocl/src/surf.cpp +++ b/modules/ocl/src/surf.cpp @@ -50,59 +50,6 @@ using namespace cv; using namespace cv::ocl; using namespace std; -#if !defined (HAVE_OPENCL) - -cv::ocl::SURF_OCL::SURF_OCL() -{ - throw_nogpu(); -} -cv::ocl::SURF_OCL::SURF_OCL(double, int, int, bool, float, bool) -{ - throw_nogpu(); -} -int cv::ocl::SURF_OCL::descriptorSize() const -{ - throw_nogpu(); - return 0; -} -void cv::ocl::SURF_OCL::uploadKeypoints(const vector &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::downloadKeypoints(const oclMat &, vector &) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::downloadDescriptors(const oclMat &, vector &) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, oclMat &, oclMat &, bool) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector &) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector &, oclMat &, bool) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::operator()(const oclMat &, const oclMat &, vector &, vector &, bool) -{ - throw_nogpu(); -} -void cv::ocl::SURF_OCL::releaseMemory() -{ - throw_nogpu(); -} - -#else /* !defined (HAVE_OPENCL) */ namespace cv { namespace ocl @@ -755,5 +702,3 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const } } -#endif // /* !defined (HAVE_OPENCL) */ - diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 5bf08c80e..9cc6c1c89 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -791,8 +791,8 @@ TEST_P(WarpAffine, Mat) { static const double coeffs[2][3] = { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0} + {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0}, + {sin(CV_PI / 6), cos(CV_PI / 6), -100.0} }; Mat M(2, 3, CV_64F, (void *)coeffs); diff --git a/modules/ocl/test/test_match_template.cpp b/modules/ocl/test/test_match_template.cpp index 3892513b4..c948e1d53 100644 --- a/modules/ocl/test/test_match_template.cpp +++ b/modules/ocl/test/test_match_template.cpp @@ -44,7 +44,7 @@ #include "precomp.hpp" -//#define PERF_TEST 0 + #ifdef HAVE_OPENCL //////////////////////////////////////////////////////////////////////////////// // MatchTemplate @@ -97,15 +97,7 @@ TEST_P(MatchTemplate8U, Accuracy) cv::Mat mat_dst; dst.download(mat_dst); - EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); - -#ifdef PERF_TEST - { - P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); - P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); - } -#endif // PERF_TEST } PARAM_TEST_CASE(MatchTemplate32F, cv::Size, TemplateSize, Channels, TemplateMethod) @@ -144,17 +136,6 @@ TEST_P(MatchTemplate32F, Accuracy) dst.download(mat_dst); EXPECT_MAT_NEAR(dst_gold, mat_dst, templ_size.area() * 1e-1, sss); - -#ifdef PERF_TEST - { - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; - std::cout << "Channels: " << cn << std::endl; - P_TEST_FULL( {}, {cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method);}, {}); - P_TEST_FULL( {}, {cv::matchTemplate(image, templ, dst_gold, method);}, {}); - } -#endif // PERF_TEST } INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U,