diff --git a/modules/ocl/doc/feature_detection_and_description.rst b/modules/ocl/doc/feature_detection_and_description.rst index b93d32f1a..77d3f7ab7 100644 --- a/modules/ocl/doc/feature_detection_and_description.rst +++ b/modules/ocl/doc/feature_detection_and_description.rst @@ -647,3 +647,138 @@ Returns block descriptors computed for the whole image. * **DESCR_FORMAT_COL_BY_COL** - Column-major order. The function is mainly used to learn the classifier. + + + +ocl::ORB_OCL +-------------- +.. ocv:class:: ocl::ORB_OCL + +Class for extracting ORB features and descriptors from an image. :: + + class ORB_OCL + { + public: + enum + { + X_ROW = 0, + Y_ROW, + RESPONSE_ROW, + ANGLE_ROW, + OCTAVE_ROW, + SIZE_ROW, + ROWS_COUNT + }; + + enum + { + DEFAULT_FAST_THRESHOLD = 20 + }; + + explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, + int nLevels = 8, int edgeThreshold = 31, + int firstLevel = 0, int WTA_K = 2, + int scoreType = 0, int patchSize = 31); + + void operator()(const oclMat& image, const oclMat& mask, + std::vector& keypoints); + void operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints); + + void operator()(const oclMat& image, const oclMat& mask, + std::vector& keypoints, oclMat& descriptors); + void operator()(const oclMat& image, const oclMat& mask, + oclMat& keypoints, oclMat& descriptors); + + void downloadKeyPoints(oclMat& d_keypoints, std::vector& keypoints); + + void convertKeyPoints(Mat& d_keypoints, std::vector& keypoints); + + int descriptorSize() const; + int descriptorType() const; + int defaultNorm() const; + + void setFastParams(int threshold, bool nonmaxSupression = true); + + void release(); + + bool blurForDescriptor; + }; + +The class implements ORB feature detection and description algorithm. + + + +ocl::ORB_OCL::ORB_OCL +------------------------ +Constructor. + +.. ocv:function:: ocl::ORB_OCL::ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31) + + :param nfeatures: The maximum number of features to retain. + + :param scaleFactor: Pyramid decimation ratio, greater than 1. ``scaleFactor==2`` means the classical pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor will mean that to cover certain scale range you will need more pyramid levels and so the speed will suffer. + + :param nlevels: The number of pyramid levels. The smallest level will have linear size equal to ``input_image_linear_size/pow(scaleFactor, nlevels)``. + + :param edgeThreshold: This is size of the border where the features are not detected. It should roughly match the ``patchSize`` parameter. + + :param firstLevel: It should be 0 in the current implementation. + + :param WTA_K: The number of points that produce each element of the oriented BRIEF descriptor. The default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 random points (of course, those point coordinates are random, but they are generated from the pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, denoted as ``NORM_HAMMING2`` (2 bits per bin). When ``WTA_K=4``, we take 4 random points to compute each bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3). + + :param scoreType: The default HARRIS_SCORE means that Harris algorithm is used to rank features (the score is written to ``KeyPoint::score`` and is used to retain best ``nfeatures`` features); FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, but it is a little faster to compute. + + :param patchSize: size of the patch used by the oriented BRIEF descriptor. Of course, on smaller pyramid layers the perceived image area covered by a feature will be larger. + + + +ocl::ORB_OCL::operator() +-------------------------- +Detects keypoints and computes descriptors for them. + +.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector& keypoints) + +.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints) + +.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector& keypoints, oclMat& descriptors) + +.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors) + + :param image: Input 8-bit grayscale image. + + :param mask: Optional input mask that marks the regions where we should detect features. + + :param keypoints: The input/output vector of keypoints. Can be stored both in host and device memory. For device memory: + + * ``X_ROW`` contains the horizontal coordinate of the i'th feature. + * ``Y_ROW`` contains the vertical coordinate of the i'th feature. + * ``RESPONSE_ROW`` contains the response of the i'th feature. + * ``ANGLE_ROW`` contains the orientation of the i'th feature. + * ``RESPONSE_ROW`` contains the octave of the i'th feature. + * ``ANGLE_ROW`` contains the size of the i'th feature. + + :param descriptors: Computed descriptors. if ``blurForDescriptor`` is true, image will be blurred before descriptors calculation. + + + +ocl::ORB_OCL::downloadKeyPoints +--------------------------------- +Download keypoints from device to host memory. + +.. ocv:function:: static void ocl::ORB_OCL::downloadKeyPoints( const oclMat& d_keypoints, std::vector& keypoints ) + + + +ocl::ORB_OCL::convertKeyPoints +-------------------------------- +Converts keypoints from OCL representation to vector of ``KeyPoint``. + +.. ocv:function:: static void ocl::ORB_OCL::convertKeyPoints( const Mat& d_keypoints, std::vector& keypoints ) + + + +ocl::ORB_OCL::release +----------------------- +Releases inner buffer memory. + +.. ocv:function:: void ocl::ORB_OCL::release() diff --git a/modules/ocl/include/opencv2/ocl.hpp b/modules/ocl/include/opencv2/ocl.hpp index 542dbeb0b..357f87b6e 100644 --- a/modules/ocl/include/opencv2/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl.hpp @@ -1513,6 +1513,110 @@ namespace cv int nonmaxSupressionOCL(oclMat& keypoints); }; + ////////////////////////////////// ORB Descriptor Extractor ////////////////////////////////// + class CV_EXPORTS ORB_OCL + { + public: + enum + { + X_ROW = 0, + Y_ROW, + RESPONSE_ROW, + ANGLE_ROW, + OCTAVE_ROW, + SIZE_ROW, + ROWS_COUNT + }; + + enum + { + DEFAULT_FAST_THRESHOLD = 20 + }; + + //! Constructor + explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, + int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31); + + //! Compute the ORB features on an image + //! image - the image to compute the features (supports only CV_8UC1 images) + //! mask - the mask to apply + //! keypoints - the resulting keypoints + void operator ()(const oclMat& image, const oclMat& mask, std::vector& keypoints); + void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints); + + //! Compute the ORB features and descriptors on an image + //! image - the image to compute the features (supports only CV_8UC1 images) + //! mask - the mask to apply + //! keypoints - the resulting keypoints + //! descriptors - descriptors array + void operator ()(const oclMat& image, const oclMat& mask, std::vector& keypoints, oclMat& descriptors); + void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors); + + //! download keypoints from device to host memory + static void downloadKeyPoints(const oclMat& d_keypoints, std::vector& keypoints); + //! convert keypoints to KeyPoint vector + static void convertKeyPoints(const Mat& d_keypoints, std::vector& keypoints); + + //! returns the descriptor size in bytes + inline int descriptorSize() const { return kBytes; } + inline int descriptorType() const { return CV_8U; } + inline int defaultNorm() const { return NORM_HAMMING; } + + inline void setFastParams(int threshold, bool nonmaxSupression = true) + { + fastDetector_.threshold = threshold; + fastDetector_.nonmaxSupression = nonmaxSupression; + } + + //! release temporary buffer's memory + void release(); + + //! if true, image will be blurred before descriptors calculation + bool blurForDescriptor; + + private: + enum { kBytes = 32 }; + + void buildScalePyramids(const oclMat& image, const oclMat& mask); + + void computeKeyPointsPyramid(); + + void computeDescriptors(oclMat& descriptors); + + void mergeKeyPoints(oclMat& keypoints); + + int nFeatures_; + float scaleFactor_; + int nLevels_; + int edgeThreshold_; + int firstLevel_; + int WTA_K_; + int scoreType_; + int patchSize_; + + // The number of desired features per scale + std::vector n_features_per_level_; + + // Points to compute BRIEF descriptors from + oclMat pattern_; + + std::vector imagePyr_; + std::vector maskPyr_; + + oclMat buf_; + + std::vector keyPointsPyr_; + std::vector keyPointsCount_; + + FAST_OCL fastDetector_; + + Ptr blurFilter; + + oclMat d_keypoints_; + + oclMat uMax_; + }; + /////////////////////////////// PyrLKOpticalFlow ///////////////////////////////////// class CV_EXPORTS PyrLKOpticalFlow diff --git a/modules/ocl/perf/perf_orb.cpp b/modules/ocl/perf/perf_orb.cpp new file mode 100644 index 000000000..628a56090 --- /dev/null +++ b/modules/ocl/perf/perf_orb.cpp @@ -0,0 +1,103 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// Authors: +// * Peter Andreas Entschev, peter@entschev.com +// +//M*/ + +#include "perf_precomp.hpp" + +using namespace perf; + +/////////////////// ORB /////////////////// + +typedef std::tr1::tuple Image_NFeatures_t; +typedef perf::TestBaseWithParam Image_NFeatures; + +PERF_TEST_P(Image_NFeatures, ORB, + testing::Combine(testing::Values("gpu/perf/aloe.png"), + testing::Values(4000))) +{ + declare.time(300.0); + + const Image_NFeatures_t params = GetParam(); + const std::string imgFile = std::tr1::get<0>(params); + const int nFeatures = std::tr1::get<1>(params); + + const cv::Mat img = imread(getDataPath(imgFile), cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(img.empty()); + + if (RUN_OCL_IMPL) + { + cv::ocl::ORB_OCL d_orb(nFeatures); + + const cv::ocl::oclMat d_img(img); + cv::ocl::oclMat d_keypoints, d_descriptors; + + TEST_CYCLE() d_orb(d_img, cv::ocl::oclMat(), d_keypoints, d_descriptors); + + std::vector ocl_keypoints; + d_orb.downloadKeyPoints(d_keypoints, ocl_keypoints); + + cv::Mat ocl_descriptors(d_descriptors); + + ocl_keypoints.resize(10); + ocl_descriptors = ocl_descriptors.rowRange(0, 10); + + sortKeyPoints(ocl_keypoints, ocl_descriptors); + + SANITY_CHECK_KEYPOINTS(ocl_keypoints, 1e-4); + SANITY_CHECK(ocl_descriptors); + } + else if (RUN_PLAIN_IMPL) + { + cv::ORB orb(nFeatures); + + std::vector cpu_keypoints; + cv::Mat cpu_descriptors; + + TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors); + + SANITY_CHECK_KEYPOINTS(cpu_keypoints); + SANITY_CHECK(cpu_descriptors); + } + else + OCL_PERF_ELSE; +} diff --git a/modules/ocl/src/opencl/orb.cl b/modules/ocl/src/opencl/orb.cl new file mode 100644 index 000000000..36176021a --- /dev/null +++ b/modules/ocl/src/opencl/orb.cl @@ -0,0 +1,503 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// Authors: +// * Peter Andreas Entschev, peter@entschev.com +// +//M*/ + +#ifdef DOUBLE_SUPPORT +#ifdef cl_amd_fp64 +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#elif defined (cl_khr_fp64) +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#endif +#define CV_PI M_PI +#else +#define CV_PI M_PI_F +#endif + +#define X_ROW 0 +#define Y_ROW 1 +#define RESPONSE_ROW 2 +#define ANGLE_ROW 3 +#define OCTAVE_ROW 4 +#define SIZE_ROW 5 +#define ROWS_COUNT 6 + + +#ifdef CPU +void reduce_32(volatile __local int* smem, volatile int* val, int tid) +{ +#define op(A, B) (*A)+(B) + + smem[tid] = *val; + barrier(CLK_LOCAL_MEM_FENCE); + + for(int i = 16; i > 0; i >>= 1) + { + if(tid < i) + { + smem[tid] = *val = op(val, smem[tid + i]); + } + barrier(CLK_LOCAL_MEM_FENCE); + } +#undef op +} +#else +void reduce_32(volatile __local int* smem, volatile int* val, int tid) +{ +#define op(A, B) (*A)+(B) + + smem[tid] = *val; + barrier(CLK_LOCAL_MEM_FENCE); + +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif + if (tid < 16) + { + smem[tid] = *val = op(val, smem[tid + 16]); +#if WAVE_SIZE < 16 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) + { +#endif + smem[tid] = *val = op(val, smem[tid + 8]); +#if WAVE_SIZE < 8 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 4) + { +#endif + smem[tid] = *val = op(val, smem[tid + 4]); +#if WAVE_SIZE < 4 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 2) + { +#endif + smem[tid] = *val = op(val, smem[tid + 2]); +#if WAVE_SIZE < 2 + } + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 1) + { +#endif + smem[tid] = *val = op(val, smem[tid + 1]); + } +#undef WAVE_SIZE +#undef op +} +#endif + +//////////////////////////////////////////////////////////////////////////////////////////////////////// +// HarrisResponses + +__kernel +void HarrisResponses(__global const uchar* img, + __global float* keypoints, + const int npoints, + const int blockSize, + const float harris_k, + const int img_step, + const int keypoints_step) +{ + __local int smem0[8 * 32]; + __local int smem1[8 * 32]; + __local int smem2[8 * 32]; + + const int ptidx = mad24(get_group_id(0), get_local_size(1), get_local_id(1)); + + if (ptidx < npoints) + { + const int pt_x = keypoints[mad24(keypoints_step, X_ROW, ptidx)]; + const int pt_y = keypoints[mad24(keypoints_step, Y_ROW, ptidx)]; + + const int r = blockSize / 2; + const int x0 = pt_x - r; + const int y0 = pt_y - r; + + int a = 0, b = 0, c = 0; + + for (int ind = get_local_id(0); ind < blockSize * blockSize; ind += get_local_size(0)) + { + const int i = ind / blockSize; + const int j = ind % blockSize; + + int center = mad24(y0+i, img_step, x0+j); + + int Ix = (img[center+1] - img[center-1]) * 2 + + (img[center-img_step+1] - img[center-img_step-1]) + + (img[center+img_step+1] - img[center+img_step-1]); + + int Iy = (img[center+img_step] - img[center-img_step]) * 2 + + (img[center+img_step-1] - img[center-img_step-1]) + + (img[center+img_step+1] - img[center-img_step+1]); + + a += Ix * Ix; + b += Iy * Iy; + c += Ix * Iy; + } + + __local int* srow0 = smem0 + get_local_id(1) * get_local_size(0); + __local int* srow1 = smem1 + get_local_id(1) * get_local_size(0); + __local int* srow2 = smem2 + get_local_id(1) * get_local_size(0); + + reduce_32(srow0, &a, get_local_id(0)); + reduce_32(srow1, &b, get_local_id(0)); + reduce_32(srow2, &c, get_local_id(0)); + + if (get_local_id(0) == 0) + { + float scale = (1 << 2) * blockSize * 255.0f; + scale = 1.0f / scale; + const float scale_sq_sq = scale * scale * scale * scale; + + float response = ((float)a * b - (float)c * c - harris_k * ((float)a + b) * ((float)a + b)) * scale_sq_sq; + keypoints[mad24(keypoints_step, RESPONSE_ROW, ptidx)] = response; + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////// +// IC_Angle + +__kernel +void IC_Angle(__global const uchar* img, + __global float* keypoints_, + __global const int* u_max, + const int npoints, + const int half_k, + const int img_step, + const int keypoints_step) +{ + __local int smem0[8 * 32]; + __local int smem1[8 * 32]; + + __local int* srow0 = smem0 + get_local_id(1) * get_local_size(0); + __local int* srow1 = smem1 + get_local_id(1) * get_local_size(0); + + const int ptidx = mad24(get_group_id(0), get_local_size(1), get_local_id(1)); + + if (ptidx < npoints) + { + int m_01 = 0, m_10 = 0; + + const int pt_x = keypoints_[mad24(keypoints_step, X_ROW, ptidx)]; + const int pt_y = keypoints_[mad24(keypoints_step, Y_ROW, ptidx)]; + + // Treat the center line differently, v=0 + for (int u = get_local_id(0) - half_k; u <= half_k; u += get_local_size(0)) + m_10 += u * img[mad24(pt_y, img_step, pt_x+u)]; + + reduce_32(srow0, &m_10, get_local_id(0)); + + for (int v = 1; v <= half_k; ++v) + { + // Proceed over the two lines + int v_sum = 0; + int m_sum = 0; + const int d = u_max[v]; + + for (int u = get_local_id(0) - d; u <= d; u += get_local_size(0)) + { + int val_plus = img[mad24(pt_y+v, img_step, pt_x+u)]; + int val_minus = img[mad24(pt_y-v, img_step, pt_x+u)]; + + v_sum += (val_plus - val_minus); + m_sum += u * (val_plus + val_minus); + } + + reduce_32(srow0, &v_sum, get_local_id(0)); + reduce_32(srow1, &m_sum, get_local_id(0)); + + m_10 += m_sum; + m_01 += v * v_sum; + } + + if (get_local_id(0) == 0) + { + float kp_dir = atan2((float)m_01, (float)m_10); + kp_dir += (kp_dir < 0) * (2.0f * CV_PI); + kp_dir *= 180.0f / CV_PI; + + keypoints_[mad24(keypoints_step, ANGLE_ROW, ptidx)] = kp_dir; + } + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////// +// computeOrbDescriptor + +#define GET_VALUE(idx) \ + img[mad24(loc.y + (int)round(pattern[idx] * sina + pattern[pattern_step+idx] * cosa), img_step, \ + loc.x + (int)round(pattern[idx] * cosa - pattern[pattern_step+idx] * sina))] + +int calcOrbDescriptor_2(__global const uchar* img, + __global const int* pattern, + const int2 loc, + const float sina, + const float cosa, + const int i, + const int img_step, + const int pattern_step) +{ + pattern += 16 * i; + + int t0, t1, val; + + t0 = GET_VALUE(0); t1 = GET_VALUE(1); + val = t0 < t1; + + t0 = GET_VALUE(2); t1 = GET_VALUE(3); + val |= (t0 < t1) << 1; + + t0 = GET_VALUE(4); t1 = GET_VALUE(5); + val |= (t0 < t1) << 2; + + t0 = GET_VALUE(6); t1 = GET_VALUE(7); + val |= (t0 < t1) << 3; + + t0 = GET_VALUE(8); t1 = GET_VALUE(9); + val |= (t0 < t1) << 4; + + t0 = GET_VALUE(10); t1 = GET_VALUE(11); + val |= (t0 < t1) << 5; + + t0 = GET_VALUE(12); t1 = GET_VALUE(13); + val |= (t0 < t1) << 6; + + t0 = GET_VALUE(14); t1 = GET_VALUE(15); + val |= (t0 < t1) << 7; + + return val; +} + +int calcOrbDescriptor_3(__global const uchar* img, + __global const int* pattern, + const int2 loc, + const float sina, + const float cosa, + const int i, + const int img_step, + const int pattern_step) +{ + pattern += 12 * i; + + int t0, t1, t2, val; + + t0 = GET_VALUE(0); t1 = GET_VALUE(1); t2 = GET_VALUE(2); + val = t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0); + + t0 = GET_VALUE(3); t1 = GET_VALUE(4); t2 = GET_VALUE(5); + val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 2; + + t0 = GET_VALUE(6); t1 = GET_VALUE(7); t2 = GET_VALUE(8); + val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 4; + + t0 = GET_VALUE(9); t1 = GET_VALUE(10); t2 = GET_VALUE(11); + val |= (t2 > t1 ? (t2 > t0 ? 2 : 0) : (t1 > t0)) << 6; + + return val; +} + +int calcOrbDescriptor_4(__global const uchar* img, + __global const int* pattern, + const int2 loc, + const float sina, + const float cosa, + const int i, + const int img_step, + const int pattern_step) +{ + pattern += 16 * i; + + int t0, t1, t2, t3, k, val; + int a, b; + + t0 = GET_VALUE(0); t1 = GET_VALUE(1); + t2 = GET_VALUE(2); t3 = GET_VALUE(3); + a = 0, b = 2; + if( t1 > t0 ) t0 = t1, a = 1; + if( t3 > t2 ) t2 = t3, b = 3; + k = t0 > t2 ? a : b; + val = k; + + t0 = GET_VALUE(4); t1 = GET_VALUE(5); + t2 = GET_VALUE(6); t3 = GET_VALUE(7); + a = 0, b = 2; + if( t1 > t0 ) t0 = t1, a = 1; + if( t3 > t2 ) t2 = t3, b = 3; + k = t0 > t2 ? a : b; + val |= k << 2; + + t0 = GET_VALUE(8); t1 = GET_VALUE(9); + t2 = GET_VALUE(10); t3 = GET_VALUE(11); + a = 0, b = 2; + if( t1 > t0 ) t0 = t1, a = 1; + if( t3 > t2 ) t2 = t3, b = 3; + k = t0 > t2 ? a : b; + val |= k << 4; + + t0 = GET_VALUE(12); t1 = GET_VALUE(13); + t2 = GET_VALUE(14); t3 = GET_VALUE(15); + a = 0, b = 2; + if( t1 > t0 ) t0 = t1, a = 1; + if( t3 > t2 ) t2 = t3, b = 3; + k = t0 > t2 ? a : b; + val |= k << 6; + + return val; +} + +#undef GET_VALUE + +__kernel +void computeOrbDescriptor(__global const uchar* img, + __global const float* keypoints, + __global const int* pattern, + __global uchar* desc, + const int npoints, + const int dsize, + const int WTA_K, + const int offset, + const int img_step, + const int keypoints_step, + const int pattern_step, + const int desc_step) +{ + const int descidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0)); + const int ptidx = mad24(get_group_id(1), get_local_size(1), get_local_id(1)); + + if (ptidx < npoints && descidx < dsize) + { + int2 loc = {(int)keypoints[mad24(keypoints_step, X_ROW, ptidx)], + (int)keypoints[mad24(keypoints_step, Y_ROW, ptidx)]}; + + float angle = keypoints[mad24(keypoints_step, ANGLE_ROW, ptidx)]; + angle *= (float)(CV_PI / 180.f); + + float sina = sin(angle); + float cosa = cos(angle); + + if (WTA_K == 2) + desc[mad24(ptidx+offset, desc_step, descidx)] = calcOrbDescriptor_2(img, pattern, loc, sina, cosa, descidx, img_step, pattern_step); + else if (WTA_K == 3) + desc[mad24(ptidx+offset, desc_step, descidx)] = calcOrbDescriptor_3(img, pattern, loc, sina, cosa, descidx, img_step, pattern_step); + else if (WTA_K == 4) + desc[mad24(ptidx+offset, desc_step, descidx)] = calcOrbDescriptor_4(img, pattern, loc, sina, cosa, descidx, img_step, pattern_step); + } +} + +//////////////////////////////////////////////////////////////////////////////////////////////////////// +// mergeLocation + +__kernel +void mergeLocation(__global const float* keypoints_in, + __global float* keypoints_out, + const int npoints, + const int offset, + const float scale, + const int octave, + const float size, + const int keypoints_in_step, + const int keypoints_out_step) +{ + //const int ptidx = blockIdx.x * blockDim.x + threadIdx.x; + const int ptidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0)); + + if (ptidx < npoints) + { + float pt_x = keypoints_in[mad24(keypoints_in_step, X_ROW, ptidx)] * scale; + float pt_y = keypoints_in[mad24(keypoints_in_step, Y_ROW, ptidx)] * scale; + float response = keypoints_in[mad24(keypoints_in_step, RESPONSE_ROW, ptidx)]; + float angle = keypoints_in[mad24(keypoints_in_step, ANGLE_ROW, ptidx)]; + + keypoints_out[mad24(keypoints_out_step, X_ROW, ptidx+offset)] = pt_x; + keypoints_out[mad24(keypoints_out_step, Y_ROW, ptidx+offset)] = pt_y; + keypoints_out[mad24(keypoints_out_step, RESPONSE_ROW, ptidx+offset)] = response; + keypoints_out[mad24(keypoints_out_step, ANGLE_ROW, ptidx+offset)] = angle; + keypoints_out[mad24(keypoints_out_step, OCTAVE_ROW, ptidx+offset)] = (float)octave; + keypoints_out[mad24(keypoints_out_step, SIZE_ROW, ptidx+offset)] = size; + } +} + +__kernel +void convertRowsToChannels(__global const float* keypoints_in, + __global float* keypoints_out, + const int npoints, + const int keypoints_in_step, + const int keypoints_out_step) +{ + const int ptidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0)); + + if (ptidx < npoints) + { + const int pt_x = keypoints_in[mad24(keypoints_in_step, X_ROW, ptidx)]; + const int pt_y = keypoints_in[mad24(keypoints_in_step, Y_ROW, ptidx)]; + + keypoints_out[ptidx*2] = pt_x; + keypoints_out[ptidx*2+1] = pt_y; + } +} + +__kernel +void convertChannelsToRows(__global const float* keypoints_pos, + __global const float* keypoints_resp, + __global float* keypoints_out, + const int npoints, + const int keypoints_pos_step, + const int keypoints_resp_step, + const int keypoints_out_step) +{ + const int ptidx = mad24(get_group_id(0), get_local_size(0), get_local_id(0)); + + if (ptidx < npoints) + { + const float pt_x = keypoints_pos[ptidx*2]; + const float pt_y = keypoints_pos[ptidx*2+1]; + const float resp = keypoints_resp[ptidx]; + + keypoints_out[mad24(keypoints_out_step, X_ROW, ptidx)] = pt_x; + keypoints_out[mad24(keypoints_out_step, Y_ROW, ptidx)] = pt_y; + keypoints_out[mad24(keypoints_out_step, RESPONSE_ROW, ptidx)] = resp; + } +} diff --git a/modules/ocl/src/orb.cpp b/modules/ocl/src/orb.cpp new file mode 100644 index 000000000..4bd022c8d --- /dev/null +++ b/modules/ocl/src/orb.cpp @@ -0,0 +1,916 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// Authors: +// * Peter Andreas Entschev, peter@entschev.com +// +//M*/ + +#include "precomp.hpp" +#include "opencl_kernels.hpp" + +using namespace cv; +using namespace cv::ocl; + +namespace +{ + const float HARRIS_K = 0.04f; + const int DESCRIPTOR_SIZE = 32; + + const int bit_pattern_31_[256 * 4] = + { + 8,-3, 9,5/*mean (0), correlation (0)*/, + 4,2, 7,-12/*mean (1.12461e-05), correlation (0.0437584)*/, + -11,9, -8,2/*mean (3.37382e-05), correlation (0.0617409)*/, + 7,-12, 12,-13/*mean (5.62303e-05), correlation (0.0636977)*/, + 2,-13, 2,12/*mean (0.000134953), correlation (0.085099)*/, + 1,-7, 1,6/*mean (0.000528565), correlation (0.0857175)*/, + -2,-10, -2,-4/*mean (0.0188821), correlation (0.0985774)*/, + -13,-13, -11,-8/*mean (0.0363135), correlation (0.0899616)*/, + -13,-3, -12,-9/*mean (0.121806), correlation (0.099849)*/, + 10,4, 11,9/*mean (0.122065), correlation (0.093285)*/, + -13,-8, -8,-9/*mean (0.162787), correlation (0.0942748)*/, + -11,7, -9,12/*mean (0.21561), correlation (0.0974438)*/, + 7,7, 12,6/*mean (0.160583), correlation (0.130064)*/, + -4,-5, -3,0/*mean (0.228171), correlation (0.132998)*/, + -13,2, -12,-3/*mean (0.00997526), correlation (0.145926)*/, + -9,0, -7,5/*mean (0.198234), correlation (0.143636)*/, + 12,-6, 12,-1/*mean (0.0676226), correlation (0.16689)*/, + -3,6, -2,12/*mean (0.166847), correlation (0.171682)*/, + -6,-13, -4,-8/*mean (0.101215), correlation (0.179716)*/, + 11,-13, 12,-8/*mean (0.200641), correlation (0.192279)*/, + 4,7, 5,1/*mean (0.205106), correlation (0.186848)*/, + 5,-3, 10,-3/*mean (0.234908), correlation (0.192319)*/, + 3,-7, 6,12/*mean (0.0709964), correlation (0.210872)*/, + -8,-7, -6,-2/*mean (0.0939834), correlation (0.212589)*/, + -2,11, -1,-10/*mean (0.127778), correlation (0.20866)*/, + -13,12, -8,10/*mean (0.14783), correlation (0.206356)*/, + -7,3, -5,-3/*mean (0.182141), correlation (0.198942)*/, + -4,2, -3,7/*mean (0.188237), correlation (0.21384)*/, + -10,-12, -6,11/*mean (0.14865), correlation (0.23571)*/, + 5,-12, 6,-7/*mean (0.222312), correlation (0.23324)*/, + 5,-6, 7,-1/*mean (0.229082), correlation (0.23389)*/, + 1,0, 4,-5/*mean (0.241577), correlation (0.215286)*/, + 9,11, 11,-13/*mean (0.00338507), correlation (0.251373)*/, + 4,7, 4,12/*mean (0.131005), correlation (0.257622)*/, + 2,-1, 4,4/*mean (0.152755), correlation (0.255205)*/, + -4,-12, -2,7/*mean (0.182771), correlation (0.244867)*/, + -8,-5, -7,-10/*mean (0.186898), correlation (0.23901)*/, + 4,11, 9,12/*mean (0.226226), correlation (0.258255)*/, + 0,-8, 1,-13/*mean (0.0897886), correlation (0.274827)*/, + -13,-2, -8,2/*mean (0.148774), correlation (0.28065)*/, + -3,-2, -2,3/*mean (0.153048), correlation (0.283063)*/, + -6,9, -4,-9/*mean (0.169523), correlation (0.278248)*/, + 8,12, 10,7/*mean (0.225337), correlation (0.282851)*/, + 0,9, 1,3/*mean (0.226687), correlation (0.278734)*/, + 7,-5, 11,-10/*mean (0.00693882), correlation (0.305161)*/, + -13,-6, -11,0/*mean (0.0227283), correlation (0.300181)*/, + 10,7, 12,1/*mean (0.125517), correlation (0.31089)*/, + -6,-3, -6,12/*mean (0.131748), correlation (0.312779)*/, + 10,-9, 12,-4/*mean (0.144827), correlation (0.292797)*/, + -13,8, -8,-12/*mean (0.149202), correlation (0.308918)*/, + -13,0, -8,-4/*mean (0.160909), correlation (0.310013)*/, + 3,3, 7,8/*mean (0.177755), correlation (0.309394)*/, + 5,7, 10,-7/*mean (0.212337), correlation (0.310315)*/, + -1,7, 1,-12/*mean (0.214429), correlation (0.311933)*/, + 3,-10, 5,6/*mean (0.235807), correlation (0.313104)*/, + 2,-4, 3,-10/*mean (0.00494827), correlation (0.344948)*/, + -13,0, -13,5/*mean (0.0549145), correlation (0.344675)*/, + -13,-7, -12,12/*mean (0.103385), correlation (0.342715)*/, + -13,3, -11,8/*mean (0.134222), correlation (0.322922)*/, + -7,12, -4,7/*mean (0.153284), correlation (0.337061)*/, + 6,-10, 12,8/*mean (0.154881), correlation (0.329257)*/, + -9,-1, -7,-6/*mean (0.200967), correlation (0.33312)*/, + -2,-5, 0,12/*mean (0.201518), correlation (0.340635)*/, + -12,5, -7,5/*mean (0.207805), correlation (0.335631)*/, + 3,-10, 8,-13/*mean (0.224438), correlation (0.34504)*/, + -7,-7, -4,5/*mean (0.239361), correlation (0.338053)*/, + -3,-2, -1,-7/*mean (0.240744), correlation (0.344322)*/, + 2,9, 5,-11/*mean (0.242949), correlation (0.34145)*/, + -11,-13, -5,-13/*mean (0.244028), correlation (0.336861)*/, + -1,6, 0,-1/*mean (0.247571), correlation (0.343684)*/, + 5,-3, 5,2/*mean (0.000697256), correlation (0.357265)*/, + -4,-13, -4,12/*mean (0.00213675), correlation (0.373827)*/, + -9,-6, -9,6/*mean (0.0126856), correlation (0.373938)*/, + -12,-10, -8,-4/*mean (0.0152497), correlation (0.364237)*/, + 10,2, 12,-3/*mean (0.0299933), correlation (0.345292)*/, + 7,12, 12,12/*mean (0.0307242), correlation (0.366299)*/, + -7,-13, -6,5/*mean (0.0534975), correlation (0.368357)*/, + -4,9, -3,4/*mean (0.099865), correlation (0.372276)*/, + 7,-1, 12,2/*mean (0.117083), correlation (0.364529)*/, + -7,6, -5,1/*mean (0.126125), correlation (0.369606)*/, + -13,11, -12,5/*mean (0.130364), correlation (0.358502)*/, + -3,7, -2,-6/*mean (0.131691), correlation (0.375531)*/, + 7,-8, 12,-7/*mean (0.160166), correlation (0.379508)*/, + -13,-7, -11,-12/*mean (0.167848), correlation (0.353343)*/, + 1,-3, 12,12/*mean (0.183378), correlation (0.371916)*/, + 2,-6, 3,0/*mean (0.228711), correlation (0.371761)*/, + -4,3, -2,-13/*mean (0.247211), correlation (0.364063)*/, + -1,-13, 1,9/*mean (0.249325), correlation (0.378139)*/, + 7,1, 8,-6/*mean (0.000652272), correlation (0.411682)*/, + 1,-1, 3,12/*mean (0.00248538), correlation (0.392988)*/, + 9,1, 12,6/*mean (0.0206815), correlation (0.386106)*/, + -1,-9, -1,3/*mean (0.0364485), correlation (0.410752)*/, + -13,-13, -10,5/*mean (0.0376068), correlation (0.398374)*/, + 7,7, 10,12/*mean (0.0424202), correlation (0.405663)*/, + 12,-5, 12,9/*mean (0.0942645), correlation (0.410422)*/, + 6,3, 7,11/*mean (0.1074), correlation (0.413224)*/, + 5,-13, 6,10/*mean (0.109256), correlation (0.408646)*/, + 2,-12, 2,3/*mean (0.131691), correlation (0.416076)*/, + 3,8, 4,-6/*mean (0.165081), correlation (0.417569)*/, + 2,6, 12,-13/*mean (0.171874), correlation (0.408471)*/, + 9,-12, 10,3/*mean (0.175146), correlation (0.41296)*/, + -8,4, -7,9/*mean (0.183682), correlation (0.402956)*/, + -11,12, -4,-6/*mean (0.184672), correlation (0.416125)*/, + 1,12, 2,-8/*mean (0.191487), correlation (0.386696)*/, + 6,-9, 7,-4/*mean (0.192668), correlation (0.394771)*/, + 2,3, 3,-2/*mean (0.200157), correlation (0.408303)*/, + 6,3, 11,0/*mean (0.204588), correlation (0.411762)*/, + 3,-3, 8,-8/*mean (0.205904), correlation (0.416294)*/, + 7,8, 9,3/*mean (0.213237), correlation (0.409306)*/, + -11,-5, -6,-4/*mean (0.243444), correlation (0.395069)*/, + -10,11, -5,10/*mean (0.247672), correlation (0.413392)*/, + -5,-8, -3,12/*mean (0.24774), correlation (0.411416)*/, + -10,5, -9,0/*mean (0.00213675), correlation (0.454003)*/, + 8,-1, 12,-6/*mean (0.0293635), correlation (0.455368)*/, + 4,-6, 6,-11/*mean (0.0404971), correlation (0.457393)*/, + -10,12, -8,7/*mean (0.0481107), correlation (0.448364)*/, + 4,-2, 6,7/*mean (0.050641), correlation (0.455019)*/, + -2,0, -2,12/*mean (0.0525978), correlation (0.44338)*/, + -5,-8, -5,2/*mean (0.0629667), correlation (0.457096)*/, + 7,-6, 10,12/*mean (0.0653846), correlation (0.445623)*/, + -9,-13, -8,-8/*mean (0.0858749), correlation (0.449789)*/, + -5,-13, -5,-2/*mean (0.122402), correlation (0.450201)*/, + 8,-8, 9,-13/*mean (0.125416), correlation (0.453224)*/, + -9,-11, -9,0/*mean (0.130128), correlation (0.458724)*/, + 1,-8, 1,-2/*mean (0.132467), correlation (0.440133)*/, + 7,-4, 9,1/*mean (0.132692), correlation (0.454)*/, + -2,1, -1,-4/*mean (0.135695), correlation (0.455739)*/, + 11,-6, 12,-11/*mean (0.142904), correlation (0.446114)*/, + -12,-9, -6,4/*mean (0.146165), correlation (0.451473)*/, + 3,7, 7,12/*mean (0.147627), correlation (0.456643)*/, + 5,5, 10,8/*mean (0.152901), correlation (0.455036)*/, + 0,-4, 2,8/*mean (0.167083), correlation (0.459315)*/, + -9,12, -5,-13/*mean (0.173234), correlation (0.454706)*/, + 0,7, 2,12/*mean (0.18312), correlation (0.433855)*/, + -1,2, 1,7/*mean (0.185504), correlation (0.443838)*/, + 5,11, 7,-9/*mean (0.185706), correlation (0.451123)*/, + 3,5, 6,-8/*mean (0.188968), correlation (0.455808)*/, + -13,-4, -8,9/*mean (0.191667), correlation (0.459128)*/, + -5,9, -3,-3/*mean (0.193196), correlation (0.458364)*/, + -4,-7, -3,-12/*mean (0.196536), correlation (0.455782)*/, + 6,5, 8,0/*mean (0.1972), correlation (0.450481)*/, + -7,6, -6,12/*mean (0.199438), correlation (0.458156)*/, + -13,6, -5,-2/*mean (0.211224), correlation (0.449548)*/, + 1,-10, 3,10/*mean (0.211718), correlation (0.440606)*/, + 4,1, 8,-4/*mean (0.213034), correlation (0.443177)*/, + -2,-2, 2,-13/*mean (0.234334), correlation (0.455304)*/, + 2,-12, 12,12/*mean (0.235684), correlation (0.443436)*/, + -2,-13, 0,-6/*mean (0.237674), correlation (0.452525)*/, + 4,1, 9,3/*mean (0.23962), correlation (0.444824)*/, + -6,-10, -3,-5/*mean (0.248459), correlation (0.439621)*/, + -3,-13, -1,1/*mean (0.249505), correlation (0.456666)*/, + 7,5, 12,-11/*mean (0.00119208), correlation (0.495466)*/, + 4,-2, 5,-7/*mean (0.00372245), correlation (0.484214)*/, + -13,9, -9,-5/*mean (0.00741116), correlation (0.499854)*/, + 7,1, 8,6/*mean (0.0208952), correlation (0.499773)*/, + 7,-8, 7,6/*mean (0.0220085), correlation (0.501609)*/, + -7,-4, -7,1/*mean (0.0233806), correlation (0.496568)*/, + -8,11, -7,-8/*mean (0.0236505), correlation (0.489719)*/, + -13,6, -12,-8/*mean (0.0268781), correlation (0.503487)*/, + 2,4, 3,9/*mean (0.0323324), correlation (0.501938)*/, + 10,-5, 12,3/*mean (0.0399235), correlation (0.494029)*/, + -6,-5, -6,7/*mean (0.0420153), correlation (0.486579)*/, + 8,-3, 9,-8/*mean (0.0548021), correlation (0.484237)*/, + 2,-12, 2,8/*mean (0.0616622), correlation (0.496642)*/, + -11,-2, -10,3/*mean (0.0627755), correlation (0.498563)*/, + -12,-13, -7,-9/*mean (0.0829622), correlation (0.495491)*/, + -11,0, -10,-5/*mean (0.0843342), correlation (0.487146)*/, + 5,-3, 11,8/*mean (0.0929937), correlation (0.502315)*/, + -2,-13, -1,12/*mean (0.113327), correlation (0.48941)*/, + -1,-8, 0,9/*mean (0.132119), correlation (0.467268)*/, + -13,-11, -12,-5/*mean (0.136269), correlation (0.498771)*/, + -10,-2, -10,11/*mean (0.142173), correlation (0.498714)*/, + -3,9, -2,-13/*mean (0.144141), correlation (0.491973)*/, + 2,-3, 3,2/*mean (0.14892), correlation (0.500782)*/, + -9,-13, -4,0/*mean (0.150371), correlation (0.498211)*/, + -4,6, -3,-10/*mean (0.152159), correlation (0.495547)*/, + -4,12, -2,-7/*mean (0.156152), correlation (0.496925)*/, + -6,-11, -4,9/*mean (0.15749), correlation (0.499222)*/, + 6,-3, 6,11/*mean (0.159211), correlation (0.503821)*/, + -13,11, -5,5/*mean (0.162427), correlation (0.501907)*/, + 11,11, 12,6/*mean (0.16652), correlation (0.497632)*/, + 7,-5, 12,-2/*mean (0.169141), correlation (0.484474)*/, + -1,12, 0,7/*mean (0.169456), correlation (0.495339)*/, + -4,-8, -3,-2/*mean (0.171457), correlation (0.487251)*/, + -7,1, -6,7/*mean (0.175), correlation (0.500024)*/, + -13,-12, -8,-13/*mean (0.175866), correlation (0.497523)*/, + -7,-2, -6,-8/*mean (0.178273), correlation (0.501854)*/, + -8,5, -6,-9/*mean (0.181107), correlation (0.494888)*/, + -5,-1, -4,5/*mean (0.190227), correlation (0.482557)*/, + -13,7, -8,10/*mean (0.196739), correlation (0.496503)*/, + 1,5, 5,-13/*mean (0.19973), correlation (0.499759)*/, + 1,0, 10,-13/*mean (0.204465), correlation (0.49873)*/, + 9,12, 10,-1/*mean (0.209334), correlation (0.49063)*/, + 5,-8, 10,-9/*mean (0.211134), correlation (0.503011)*/, + -1,11, 1,-13/*mean (0.212), correlation (0.499414)*/, + -9,-3, -6,2/*mean (0.212168), correlation (0.480739)*/, + -1,-10, 1,12/*mean (0.212731), correlation (0.502523)*/, + -13,1, -8,-10/*mean (0.21327), correlation (0.489786)*/, + 8,-11, 10,-6/*mean (0.214159), correlation (0.488246)*/, + 2,-13, 3,-6/*mean (0.216993), correlation (0.50287)*/, + 7,-13, 12,-9/*mean (0.223639), correlation (0.470502)*/, + -10,-10, -5,-7/*mean (0.224089), correlation (0.500852)*/, + -10,-8, -8,-13/*mean (0.228666), correlation (0.502629)*/, + 4,-6, 8,5/*mean (0.22906), correlation (0.498305)*/, + 3,12, 8,-13/*mean (0.233378), correlation (0.503825)*/, + -4,2, -3,-3/*mean (0.234323), correlation (0.476692)*/, + 5,-13, 10,-12/*mean (0.236392), correlation (0.475462)*/, + 4,-13, 5,-1/*mean (0.236842), correlation (0.504132)*/, + -9,9, -4,3/*mean (0.236977), correlation (0.497739)*/, + 0,3, 3,-9/*mean (0.24314), correlation (0.499398)*/, + -12,1, -6,1/*mean (0.243297), correlation (0.489447)*/, + 3,2, 4,-8/*mean (0.00155196), correlation (0.553496)*/, + -10,-10, -10,9/*mean (0.00239541), correlation (0.54297)*/, + 8,-13, 12,12/*mean (0.0034413), correlation (0.544361)*/, + -8,-12, -6,-5/*mean (0.003565), correlation (0.551225)*/, + 2,2, 3,7/*mean (0.00835583), correlation (0.55285)*/, + 10,6, 11,-8/*mean (0.00885065), correlation (0.540913)*/, + 6,8, 8,-12/*mean (0.0101552), correlation (0.551085)*/, + -7,10, -6,5/*mean (0.0102227), correlation (0.533635)*/, + -3,-9, -3,9/*mean (0.0110211), correlation (0.543121)*/, + -1,-13, -1,5/*mean (0.0113473), correlation (0.550173)*/, + -3,-7, -3,4/*mean (0.0140913), correlation (0.554774)*/, + -8,-2, -8,3/*mean (0.017049), correlation (0.55461)*/, + 4,2, 12,12/*mean (0.01778), correlation (0.546921)*/, + 2,-5, 3,11/*mean (0.0224022), correlation (0.549667)*/, + 6,-9, 11,-13/*mean (0.029161), correlation (0.546295)*/, + 3,-1, 7,12/*mean (0.0303081), correlation (0.548599)*/, + 11,-1, 12,4/*mean (0.0355151), correlation (0.523943)*/, + -3,0, -3,6/*mean (0.0417904), correlation (0.543395)*/, + 4,-11, 4,12/*mean (0.0487292), correlation (0.542818)*/, + 2,-4, 2,1/*mean (0.0575124), correlation (0.554888)*/, + -10,-6, -8,1/*mean (0.0594242), correlation (0.544026)*/, + -13,7, -11,1/*mean (0.0597391), correlation (0.550524)*/, + -13,12, -11,-13/*mean (0.0608974), correlation (0.55383)*/, + 6,0, 11,-13/*mean (0.065126), correlation (0.552006)*/, + 0,-1, 1,4/*mean (0.074224), correlation (0.546372)*/, + -13,3, -9,-2/*mean (0.0808592), correlation (0.554875)*/, + -9,8, -6,-3/*mean (0.0883378), correlation (0.551178)*/, + -13,-6, -8,-2/*mean (0.0901035), correlation (0.548446)*/, + 5,-9, 8,10/*mean (0.0949843), correlation (0.554694)*/, + 2,7, 3,-9/*mean (0.0994152), correlation (0.550979)*/, + -1,-6, -1,-1/*mean (0.10045), correlation (0.552714)*/, + 9,5, 11,-2/*mean (0.100686), correlation (0.552594)*/, + 11,-3, 12,-8/*mean (0.101091), correlation (0.532394)*/, + 3,0, 3,5/*mean (0.101147), correlation (0.525576)*/, + -1,4, 0,10/*mean (0.105263), correlation (0.531498)*/, + 3,-6, 4,5/*mean (0.110785), correlation (0.540491)*/, + -13,0, -10,5/*mean (0.112798), correlation (0.536582)*/, + 5,8, 12,11/*mean (0.114181), correlation (0.555793)*/, + 8,9, 9,-6/*mean (0.117431), correlation (0.553763)*/, + 7,-4, 8,-12/*mean (0.118522), correlation (0.553452)*/, + -10,4, -10,9/*mean (0.12094), correlation (0.554785)*/, + 7,3, 12,4/*mean (0.122582), correlation (0.555825)*/, + 9,-7, 10,-2/*mean (0.124978), correlation (0.549846)*/, + 7,0, 12,-2/*mean (0.127002), correlation (0.537452)*/, + -1,-6, 0,-11/*mean (0.127148), correlation (0.547401)*/ + }; + + void initializeOrbPattern(const Point* pattern0, Mat& pattern, int ntuples, int tupleSize, int poolSize) + { + RNG rng(0x12345678); + + pattern.create(2, ntuples * tupleSize, CV_32SC1); + pattern.setTo(Scalar::all(0)); + + int* pattern_x_ptr = pattern.ptr(0); + int* pattern_y_ptr = pattern.ptr(1); + + for (int i = 0; i < ntuples; i++) + { + for (int k = 0; k < tupleSize; k++) + { + for(;;) + { + int idx = rng.uniform(0, poolSize); + Point pt = pattern0[idx]; + + int k1; + for (k1 = 0; k1 < k; k1++) + if (pattern_x_ptr[tupleSize * i + k1] == pt.x && pattern_y_ptr[tupleSize * i + k1] == pt.y) + break; + + if (k1 == k) + { + pattern_x_ptr[tupleSize * i + k] = pt.x; + pattern_y_ptr[tupleSize * i + k] = pt.y; + break; + } + } + } + } + } + + void makeRandomPattern(int patchSize, Point* pattern, int npoints) + { + // we always start with a fixed seed, + // to make patterns the same on each run + RNG rng(0x34985739); + + for (int i = 0; i < npoints; i++) + { + pattern[i].x = rng.uniform(-patchSize / 2, patchSize / 2 + 1); + pattern[i].y = rng.uniform(-patchSize / 2, patchSize / 2 + 1); + } + } +} + +cv::ocl::ORB_OCL::ORB_OCL(int nFeatures, float scaleFactor, int nLevels, int edgeThreshold, int firstLevel, int WTA_K, int scoreType, int patchSize) : + nFeatures_(nFeatures), scaleFactor_(scaleFactor), nLevels_(nLevels), edgeThreshold_(edgeThreshold), firstLevel_(firstLevel), WTA_K_(WTA_K), + scoreType_(scoreType), patchSize_(patchSize), + fastDetector_(DEFAULT_FAST_THRESHOLD) +{ + CV_Assert(patchSize_ >= 2); + + // fill the extractors and descriptors for the corresponding scales + float factor = 1.0f / scaleFactor_; + float n_desired_features_per_scale = nFeatures_ * (1.0f - factor) / (1.0f - std::pow(factor, nLevels_)); + + n_features_per_level_.resize(nLevels_); + size_t sum_n_features = 0; + for (int level = 0; level < nLevels_ - 1; ++level) + { + n_features_per_level_[level] = cvRound(n_desired_features_per_scale); + sum_n_features += n_features_per_level_[level]; + n_desired_features_per_scale *= factor; + } + n_features_per_level_[nLevels_ - 1] = nFeatures - sum_n_features; + + // pre-compute the end of a row in a circular patch + int half_patch_size = patchSize_ / 2; + std::vector u_max(half_patch_size + 2); + for (int v = 0; v <= half_patch_size * std::sqrt(2.f) / 2 + 1; ++v) + u_max[v] = cvRound(std::sqrt(static_cast(half_patch_size * half_patch_size - v * v))); + + // Make sure we are symmetric + for (int v = half_patch_size, v_0 = 0; v >= half_patch_size * std::sqrt(2.f) / 2; --v) + { + while (u_max[v_0] == u_max[v_0 + 1]) + ++v_0; + u_max[v] = v_0; + ++v_0; + } + CV_Assert(u_max.size() < 32); + //cv::cuda::device::orb::loadUMax(&u_max[0], static_cast(u_max.size())); + uMax_ = oclMat(1, u_max.size(), CV_32SC1, &u_max[0]); + + // Calc pattern + const int npoints = 512; + Point pattern_buf[npoints]; + const Point* pattern0 = (const Point*)bit_pattern_31_; + if (patchSize_ != 31) + { + pattern0 = pattern_buf; + makeRandomPattern(patchSize_, pattern_buf, npoints); + } + + CV_Assert(WTA_K_ == 2 || WTA_K_ == 3 || WTA_K_ == 4); + + Mat h_pattern; + + if (WTA_K_ == 2) + { + h_pattern.create(2, npoints, CV_32SC1); + + int* pattern_x_ptr = h_pattern.ptr(0); + int* pattern_y_ptr = h_pattern.ptr(1); + + for (int i = 0; i < npoints; ++i) + { + pattern_x_ptr[i] = pattern0[i].x; + pattern_y_ptr[i] = pattern0[i].y; + } + } + else + { + int ntuples = descriptorSize() * 4; + initializeOrbPattern(pattern0, h_pattern, ntuples, WTA_K_, npoints); + } + + pattern_.upload(h_pattern); + + //blurFilter = ocl::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101); + blurFilter = ocl::createGaussianFilter_GPU(CV_8UC1, Size(7, 7), 2, 2, BORDER_REFLECT_101); + + blurForDescriptor = true; +} + +namespace +{ + inline float getScale(float scaleFactor, int firstLevel, int level) + { + return pow(scaleFactor, level - firstLevel); + } +} + +void cv::ocl::ORB_OCL::buildScalePyramids(const oclMat& image, const oclMat& mask) +{ + CV_Assert(image.type() == CV_8UC1); + CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size())); + + imagePyr_.resize(nLevels_); + maskPyr_.resize(nLevels_); + + for (int level = 0; level < nLevels_; ++level) + { + float scale = 1.0f / getScale(scaleFactor_, firstLevel_, level); + + Size sz(cvRound(image.cols * scale), cvRound(image.rows * scale)); + + ensureSizeIsEnough(sz, image.type(), imagePyr_[level]); + ensureSizeIsEnough(sz, CV_8UC1, maskPyr_[level]); + maskPyr_[level].setTo(Scalar::all(255)); + + // Compute the resized image + if (level != firstLevel_) + { + if (level < firstLevel_) + { + ocl::resize(image, imagePyr_[level], sz, 0, 0, INTER_LINEAR); + + if (!mask.empty()) + ocl::resize(mask, maskPyr_[level], sz, 0, 0, INTER_LINEAR); + } + else + { + ocl::resize(imagePyr_[level - 1], imagePyr_[level], sz, 0, 0, INTER_LINEAR); + + if (!mask.empty()) + { + ocl::resize(maskPyr_[level - 1], maskPyr_[level], sz, 0, 0, INTER_LINEAR); + ocl::threshold(maskPyr_[level], maskPyr_[level], 254, 0, THRESH_TOZERO); + } + } + } + else + { + image.copyTo(imagePyr_[level]); + + if (!mask.empty()) + mask.copyTo(maskPyr_[level]); + } + + // Filter keypoints by image border + ensureSizeIsEnough(sz, CV_8UC1, buf_); + buf_.setTo(Scalar::all(0)); + Rect inner(edgeThreshold_, edgeThreshold_, sz.width - 2 * edgeThreshold_, sz.height - 2 * edgeThreshold_); + buf_(inner).setTo(Scalar::all(255)); + + ocl::bitwise_and(maskPyr_[level], buf_, maskPyr_[level]); + } +} + +static void HarrisResponses_OCL(const oclMat& img, oclMat& keypoints, const int npoints, int blockSize, float harris_k) +{ + size_t localThreads[3] = {32, 8, 1}; + size_t globalThreads[3] = {divUp(npoints, localThreads[1]) * localThreads[1] * localThreads[0], + 1, + 1}; + + Context *clCxt = Context::getContext(); + String kernelName = "HarrisResponses"; + std::vector< std::pair > args; + + int imgStep = img.step / img.elemSize(); + int keypointsStep = keypoints.step / keypoints.elemSize(); + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&blockSize)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&harris_k)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&imgStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsStep)); + + bool is_cpu = isCpuDevice(); + if (is_cpu) + openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1, (char*)"-D CPU"); + else + { + cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &orb, kernelName); + int wave_size = (int)queryWaveFrontSize(kernel); + openCLSafeCall(clReleaseKernel(kernel)); + + std::string opt = format("-D WAVE_SIZE=%d", wave_size); + openCLExecuteKernel(Context::getContext(), &orb, kernelName, globalThreads, localThreads, args, -1, -1, opt.c_str()); + } +} + +static void IC_Angle_OCL(const oclMat& image, oclMat& keypoints, const oclMat& uMax, int npoints, int half_k) +{ + size_t localThreads[3] = {32, 8, 1}; + size_t globalThreads[3] = {divUp(npoints, localThreads[1]) * localThreads[1] * localThreads[0], + 1, + 1}; + + Context *clCxt = Context::getContext(); + String kernelName = "IC_Angle"; + std::vector< std::pair > args; + + int imageStep = image.step / image.elemSize(); + int keypointsStep = keypoints.step / keypoints.elemSize(); + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&image.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&uMax.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&half_k)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&imageStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsStep)); + + bool is_cpu = isCpuDevice(); + if (is_cpu) + openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1, (char*)"-D CPU"); + else + { + cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &orb, kernelName); + int wave_size = (int)queryWaveFrontSize(kernel); + openCLSafeCall(clReleaseKernel(kernel)); + + std::string opt = format("-D WAVE_SIZE=%d", wave_size); + openCLExecuteKernel(Context::getContext(), &orb, kernelName, globalThreads, localThreads, args, -1, -1, opt.c_str()); + } +} + +static void convertRowsToChannels_OCL(const oclMat& keypointsIn, oclMat& keypointsOut, int npoints) +{ + size_t localThreads[3] = {256, 1, 1}; + size_t globalThreads[3] = {divUp(npoints, localThreads[0]) * localThreads[0], + 1, + 1}; + + Context *clCxt = Context::getContext(); + String kernelName = "convertRowsToChannels"; + std::vector< std::pair > args; + + int keypointsInStep = keypointsIn.step / keypointsIn.elemSize(); + int keypointsOutStep = keypointsOut.step / keypointsOut.elemSize(); + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsIn.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsOut.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsInStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsOutStep)); + + openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1); +} + +static void convertChannelsToRows_OCL(const oclMat& keypointsPos, const oclMat& keypointsResp, + oclMat& keypointsOut, int npoints) +{ + size_t localThreads[3] = {256, 1, 1}; + size_t globalThreads[3] = {divUp(npoints, localThreads[0]) * localThreads[0], + 1, + 1}; + + Context *clCxt = Context::getContext(); + String kernelName = "convertChannelsToRows"; + std::vector< std::pair > args; + + int keypointsPosStep = keypointsPos.step / keypointsResp.elemSize(); + int keypointsRespStep = keypointsResp.step / keypointsResp.elemSize(); + int keypointsOutStep = keypointsOut.step / keypointsOut.elemSize(); + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsPos.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsResp.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsOut.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsPosStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsRespStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsOutStep)); + + openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::ORB_OCL::computeKeyPointsPyramid() +{ + int half_patch_size = patchSize_ / 2; + + keyPointsPyr_.resize(nLevels_); + keyPointsCount_.resize(nLevels_); + + for (int level = 0; level < nLevels_; ++level) + { + keyPointsCount_[level] = fastDetector_.calcKeyPointsLocation(imagePyr_[level], maskPyr_[level]); + + if (keyPointsCount_[level] == 0) + continue; + + keyPointsCount_[level] = fastDetector_.getKeyPoints(keyPointsPyr_[level]); + + if (keyPointsCount_[level] == 0) + continue; + + int n_features = static_cast(n_features_per_level_[level]); + + if (scoreType_ == ORB::HARRIS_SCORE) + { + int featuresToIncrease = 2 * n_features - keyPointsPyr_[level].cols; + if (featuresToIncrease < 0) featuresToIncrease = 0; + + // Keeps more points than necessary as FAST does not give amazing corners + // and expands rows in the keypoint matrix to store angle, octave and size + copyMakeBorder(keyPointsPyr_[level], keyPointsPyr_[level], + 0, ROWS_COUNT-keyPointsPyr_[level].rows, + 0, featuresToIncrease, + BORDER_CONSTANT, 0.f); + + // Compute the Harris cornerness (better scoring than FAST) + HarrisResponses_OCL(imagePyr_[level], keyPointsPyr_[level], keyPointsCount_[level], 7, HARRIS_K); + } + else + { + // Expands rows in the keypoint matrix to store angle, octave and size + copyMakeBorder(keyPointsPyr_[level], keyPointsPyr_[level], + 0, ROWS_COUNT-keyPointsPyr_[level].rows, + 0, 0, + BORDER_CONSTANT, 0.f); + } + + + // To use sortByKey the keypoint locations have to be reorganized as one row and two channels, + // leaving the keys (responses) as a one row, one channel matrix. + // TODO: change this when sortByRow is implemented. + oclMat keypointsResp, keypointsPos(1,keyPointsCount_[level],CV_32FC2); + keyPointsPyr_[level].row(RESPONSE_ROW).colRange(0,keyPointsCount_[level]).copyTo(keypointsResp); + + convertRowsToChannels_OCL(keyPointsPyr_[level].rowRange(0,2), keypointsPos, keyPointsCount_[level]); + ocl::sortByKey(keypointsResp, keypointsPos, SORT_MERGE, true); + + keyPointsCount_[level] = std::min(n_features,keyPointsCount_[level]); + + // The data is then reorganized back to one channel, three rows (X_ROW, Y_ROW, RESPONSE_ROW) + convertChannelsToRows_OCL(keypointsPos, keypointsResp, keyPointsPyr_[level], keyPointsCount_[level]); + + // Compute orientation + IC_Angle_OCL(imagePyr_[level], keyPointsPyr_[level], uMax_, keyPointsCount_[level], half_patch_size); + } +} + +static void computeOrbDescriptor_OCL(const oclMat& img, const oclMat& keypoints, const oclMat& pattern, + oclMat& desc, const int npoints, const int dsize, const int WTA_K, + const int offset) +{ + size_t localThreads[3] = {32, 8, 1}; + size_t globalThreads[3] = {divUp(dsize, localThreads[0]) * localThreads[0], + divUp(npoints, localThreads[1]) * localThreads[1], + 1}; + + Context *clCxt = Context::getContext(); + String kernelName = "computeOrbDescriptor"; + std::vector< std::pair > args; + + int imgStep = img.step / img.elemSize(); + int keypointsStep = keypoints.step / keypoints.elemSize(); + int patternStep = pattern.step / pattern.elemSize(); + int descStep = desc.step / desc.elemSize(); + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&img.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypoints.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&pattern.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&desc.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dsize)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&WTA_K)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&offset)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&imgStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&patternStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&descStep)); + + openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::ORB_OCL::computeDescriptors(oclMat& descriptors) +{ + int nAllkeypoints = 0; + + for (int level = 0; level < nLevels_; ++level) + nAllkeypoints += keyPointsCount_[level]; + + if (nAllkeypoints == 0) + { + descriptors.release(); + return; + } + + ensureSizeIsEnough(nAllkeypoints, descriptorSize(), CV_8UC1, descriptors); + + int offset = 0; + + for (int level = 0; level < nLevels_; ++level) + { + if (keyPointsCount_[level] == 0) + continue; + + if (blurForDescriptor) + { + // preprocess the resized image + ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); + blurFilter->apply(imagePyr_[level], buf_); + } + + computeOrbDescriptor_OCL(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level], + pattern_, descriptors, keyPointsCount_[level], descriptorSize(), WTA_K_, offset); + + offset += keyPointsCount_[level]; + } +} + +static void mergeLocation_OCL(const oclMat& keypointsIn, oclMat& keypointsOut, const int npoints, + const int offset, const float scale, const int octave, const float size) +{ + size_t localThreads[3] = {256, 1, 1}; + size_t globalThreads[3] = {divUp(npoints, localThreads[0]) * localThreads[0], + 1, + 1}; + + Context *clCxt = Context::getContext(); + String kernelName = "mergeLocation"; + std::vector< std::pair > args; + + int keypointsInStep = keypointsIn.step / keypointsIn.elemSize(); + int keypointsOutStep = keypointsOut.step / keypointsOut.elemSize(); + + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsIn.data)); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&keypointsOut.data)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&npoints)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&offset)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&scale)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&octave)); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&size)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsInStep)); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&keypointsOutStep)); + + openCLExecuteKernel(clCxt, &orb, kernelName, globalThreads, localThreads, args, -1, -1); +} + +void cv::ocl::ORB_OCL::mergeKeyPoints(oclMat& keypoints) +{ + int nAllkeypoints = 0; + + for (int level = 0; level < nLevels_; ++level) + nAllkeypoints += keyPointsCount_[level]; + + if (nAllkeypoints == 0) + { + keypoints.release(); + return; + } + + ensureSizeIsEnough(ROWS_COUNT, nAllkeypoints, CV_32FC1, keypoints); + + int offset = 0; + + for (int level = 0; level < nLevels_; ++level) + { + if (keyPointsCount_[level] == 0) + continue; + + float sf = getScale(scaleFactor_, firstLevel_, level); + + float locScale = level != firstLevel_ ? sf : 1.0f; + float size = patchSize_ * sf; + + mergeLocation_OCL(keyPointsPyr_[level], keypoints, keyPointsCount_[level], offset, locScale, level, size); + + offset += keyPointsCount_[level]; + } +} + +void cv::ocl::ORB_OCL::downloadKeyPoints(const oclMat &d_keypoints, std::vector& keypoints) +{ + if (d_keypoints.empty()) + { + keypoints.clear(); + return; + } + + Mat h_keypoints(d_keypoints); + + convertKeyPoints(h_keypoints, keypoints); +} + +void cv::ocl::ORB_OCL::convertKeyPoints(const Mat &d_keypoints, std::vector& keypoints) +{ + if (d_keypoints.empty()) + { + keypoints.clear(); + return; + } + + CV_Assert(d_keypoints.type() == CV_32FC1 && d_keypoints.rows == ROWS_COUNT); + + const float* x_ptr = d_keypoints.ptr(X_ROW); + const float* y_ptr = d_keypoints.ptr(Y_ROW); + const float* response_ptr = d_keypoints.ptr(RESPONSE_ROW); + const float* angle_ptr = d_keypoints.ptr(ANGLE_ROW); + const float* octave_ptr = d_keypoints.ptr(OCTAVE_ROW); + const float* size_ptr = d_keypoints.ptr(SIZE_ROW); + + keypoints.resize(d_keypoints.cols); + + for (int i = 0; i < d_keypoints.cols; ++i) + { + KeyPoint kp; + + kp.pt.x = x_ptr[i]; + kp.pt.y = y_ptr[i]; + kp.response = response_ptr[i]; + kp.angle = angle_ptr[i]; + kp.octave = static_cast(octave_ptr[i]); + kp.size = size_ptr[i]; + + keypoints[i] = kp; + } +} + +void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints) +{ + buildScalePyramids(image, mask); + computeKeyPointsPyramid(); + mergeKeyPoints(keypoints); +} + +void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors) +{ + buildScalePyramids(image, mask); + computeKeyPointsPyramid(); + computeDescriptors(descriptors); + mergeKeyPoints(keypoints); +} + +void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector& keypoints) +{ + (*this)(image, mask, d_keypoints_); + downloadKeyPoints(d_keypoints_, keypoints); +} + +void cv::ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector& keypoints, oclMat& descriptors) +{ + (*this)(image, mask, d_keypoints_, descriptors); + downloadKeyPoints(d_keypoints_, keypoints); +} + +void cv::ocl::ORB_OCL::release() +{ + imagePyr_.clear(); + maskPyr_.clear(); + + buf_.release(); + + keyPointsPyr_.clear(); + + fastDetector_.release(); + + d_keypoints_.release(); + + uMax_.release(); +} diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index 9cdb07aae..4cd700a16 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -72,6 +72,7 @@ #include "opencv2/imgproc.hpp" #include "opencv2/objdetect/objdetect_c.h" #include "opencv2/ocl.hpp" +#include "opencv2/features2d.hpp" #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" diff --git a/modules/ocl/test/test_orb.cpp b/modules/ocl/test/test_orb.cpp new file mode 100644 index 000000000..8df7e4862 --- /dev/null +++ b/modules/ocl/test/test_orb.cpp @@ -0,0 +1,138 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +// Authors: +// * Peter Andreas Entschev, peter@entschev.com +// +//M*/ + +#include "test_precomp.hpp" + +#ifdef HAVE_OPENCL + +//////////////////////////////////////////////////////// +// ORB + +namespace +{ + IMPLEMENT_PARAM_CLASS(ORB_FeaturesCount, int) + IMPLEMENT_PARAM_CLASS(ORB_ScaleFactor, float) + IMPLEMENT_PARAM_CLASS(ORB_LevelsCount, int) + IMPLEMENT_PARAM_CLASS(ORB_EdgeThreshold, int) + IMPLEMENT_PARAM_CLASS(ORB_firstLevel, int) + IMPLEMENT_PARAM_CLASS(ORB_WTA_K, int) + IMPLEMENT_PARAM_CLASS(ORB_PatchSize, int) + IMPLEMENT_PARAM_CLASS(ORB_BlurForDescriptor, bool) +} + +CV_ENUM(ORB_ScoreType, ORB::HARRIS_SCORE, ORB::FAST_SCORE) + +PARAM_TEST_CASE(ORB, ORB_FeaturesCount, ORB_ScaleFactor, ORB_LevelsCount, ORB_EdgeThreshold, + ORB_firstLevel, ORB_WTA_K, ORB_ScoreType, ORB_PatchSize, ORB_BlurForDescriptor) +{ + int nFeatures; + float scaleFactor; + int nLevels; + int edgeThreshold; + int firstLevel; + int WTA_K; + int scoreType; + int patchSize; + bool blurForDescriptor; + + virtual void SetUp() + { + nFeatures = GET_PARAM(0); + scaleFactor = GET_PARAM(1); + nLevels = GET_PARAM(2); + edgeThreshold = GET_PARAM(3); + firstLevel = GET_PARAM(4); + WTA_K = GET_PARAM(5); + scoreType = GET_PARAM(6); + patchSize = GET_PARAM(7); + blurForDescriptor = GET_PARAM(8); + } +}; + +OCL_TEST_P(ORB, Accuracy) +{ + cv::Mat image = readImage("gpu/perf/aloe.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(image.empty()); + + cv::Mat mask(image.size(), CV_8UC1, cv::Scalar::all(1)); + mask(cv::Range(0, image.rows / 2), cv::Range(0, image.cols / 2)).setTo(cv::Scalar::all(0)); + + cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image); + cv::ocl::oclMat ocl_mask = cv::ocl::oclMat(mask); + + cv::ocl::ORB_OCL orb(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); + orb.blurForDescriptor = blurForDescriptor; + + std::vector keypoints; + cv::ocl::oclMat descriptors; + orb(ocl_image, ocl_mask, keypoints, descriptors); + + cv::ORB orb_gold(nFeatures, scaleFactor, nLevels, edgeThreshold, firstLevel, WTA_K, scoreType, patchSize); + + std::vector keypoints_gold; + cv::Mat descriptors_gold; + orb_gold(image, mask, keypoints_gold, descriptors_gold); + + cv::BFMatcher matcher(cv::NORM_HAMMING); + std::vector matches; + matcher.match(descriptors_gold, cv::Mat(descriptors), matches); + + int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints, matches); + double matchedRatio = static_cast(matchedCount) / keypoints.size(); + + EXPECT_GT(matchedRatio, 0.35); +} + +INSTANTIATE_TEST_CASE_P(OCL_Features2D, ORB, testing::Combine( + testing::Values(ORB_FeaturesCount(1000)), + testing::Values(ORB_ScaleFactor(1.2f)), + testing::Values(ORB_LevelsCount(4), ORB_LevelsCount(8)), + testing::Values(ORB_EdgeThreshold(31)), + testing::Values(ORB_firstLevel(0), ORB_firstLevel(2)), + testing::Values(ORB_WTA_K(2), ORB_WTA_K(3), ORB_WTA_K(4)), + testing::Values(ORB_ScoreType(cv::ORB::HARRIS_SCORE)), + testing::Values(ORB_PatchSize(31), ORB_PatchSize(29)), + testing::Values(ORB_BlurForDescriptor(false), ORB_BlurForDescriptor(true)))); + +#endif diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index 7d43b2adc..3195019ca 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -325,4 +325,42 @@ testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char return ::testing::AssertionSuccess(); } +int getMatchedPointsCount(std::vector& gold, std::vector& actual) +{ + std::sort(actual.begin(), actual.end(), KeyPointLess()); + std::sort(gold.begin(), gold.end(), KeyPointLess()); + + int validCount = 0; + + size_t sz = std::min(gold.size(), actual.size()); + for (size_t i = 0; i < sz; ++i) + { + const cv::KeyPoint& p1 = gold[i]; + const cv::KeyPoint& p2 = actual[i]; + + if (keyPointsEquals(p1, p2)) + ++validCount; + } + + return validCount; +} + +int getMatchedPointsCount(const std::vector& keypoints1, const std::vector& keypoints2, const std::vector& matches) +{ + int validCount = 0; + + for (size_t i = 0; i < matches.size(); ++i) + { + const cv::DMatch& m = matches[i]; + + const cv::KeyPoint& p1 = keypoints1[m.queryIdx]; + const cv::KeyPoint& p2 = keypoints2[m.trainIdx]; + + if (keyPointsEquals(p1, p2)) + ++validCount; + } + + return validCount; +} + } // namespace cvtest diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index ab1a52b7f..2659a5363 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -56,6 +56,8 @@ namespace cvtest { testing::AssertionResult assertKeyPointsEquals(const char* gold_expr, const char* actual_expr, std::vector& gold, std::vector& actual); #define ASSERT_KEYPOINTS_EQ(gold, actual) EXPECT_PRED_FORMAT2(assertKeyPointsEquals, gold, actual) +CV_EXPORTS int getMatchedPointsCount(std::vector& gold, std::vector& actual); +CV_EXPORTS int getMatchedPointsCount(const std::vector& keypoints1, const std::vector& keypoints2, const std::vector& matches); void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);