78be4f66f7
Conflicts: CMakeLists.txt modules/calib3d/src/calibration.cpp modules/ocl/src/cl_programcache.cpp modules/ocl/src/filtering.cpp modules/ocl/src/imgproc.cpp samples/ocl/adaptive_bilateral_filter.cpp samples/ocl/bgfg_segm.cpp samples/ocl/clahe.cpp samples/ocl/facedetect.cpp samples/ocl/pyrlk_optical_flow.cpp samples/ocl/squares.cpp samples/ocl/surf_matcher.cpp samples/ocl/tvl1_optical_flow.cpp
340 lines
11 KiB
C++
340 lines
11 KiB
C++
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
//
|
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
//
|
|
// By downloading, copying, installing or using the software you agree to this license.
|
|
// If you do not agree to this license, do not download, install,
|
|
// copy or use the software.
|
|
//
|
|
//
|
|
// License Agreement
|
|
// For Open Source Computer Vision Library
|
|
//
|
|
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
|
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
|
// Third party copyrights are property of their respective owners.
|
|
//
|
|
// @Authors
|
|
// Peng Xiao, pengxiao@outlook.com
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without modification,
|
|
// are permitted provided that the following conditions are met:
|
|
//
|
|
// * Redistribution's of source code must retain the above copyright notice,
|
|
// this list of conditions and the following disclaimer.
|
|
//
|
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
// this list of conditions and the following disclaimer in the documentation
|
|
// and/or other materials provided with the distribution.
|
|
//
|
|
// * The name of the copyright holders may not be used to endorse or promote products
|
|
// derived from this software without specific prior written permission.
|
|
//
|
|
// This software is provided by the copyright holders and contributors as is and
|
|
// any express or implied warranties, including, but not limited to, the implied
|
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
// indirect, incidental, special, exemplary, or consequential damages
|
|
// (including, but not limited to, procurement of substitute goods or services;
|
|
// loss of use, data, or profits; or business interruption) however caused
|
|
// and on any theory of liability, whether in contract, strict liability,
|
|
// or tort (including negligence or otherwise) arising in any way out of
|
|
// the use of this software, even if advised of the possibility of such damage.
|
|
//
|
|
//M*/
|
|
#include "precomp.hpp"
|
|
#include "opencl_kernels.hpp"
|
|
|
|
using namespace cv;
|
|
using namespace cv::ocl;
|
|
|
|
static bool use_cpu_sorter = true;
|
|
|
|
namespace
|
|
{
|
|
enum SortMethod
|
|
{
|
|
CPU_STL,
|
|
BITONIC,
|
|
SELECTION
|
|
};
|
|
|
|
const int GROUP_SIZE = 256;
|
|
|
|
template<SortMethod method>
|
|
struct Sorter
|
|
{
|
|
//typedef EigType;
|
|
};
|
|
|
|
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
|
|
template<>
|
|
struct Sorter<CPU_STL>
|
|
{
|
|
typedef oclMat EigType;
|
|
static cv::Mutex cs;
|
|
static Mat mat_eig;
|
|
|
|
//prototype
|
|
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
|
|
{
|
|
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
|
|
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
|
|
return v1 > v2;
|
|
}
|
|
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
|
{
|
|
cv::AutoLock lock(cs);
|
|
//temporarily use STL's sort function
|
|
Mat mat_corners = corners;
|
|
mat_eig = eig_tex;
|
|
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
|
|
corners = mat_corners;
|
|
}
|
|
};
|
|
cv::Mutex Sorter<CPU_STL>::cs;
|
|
cv::Mat Sorter<CPU_STL>::mat_eig;
|
|
|
|
template<>
|
|
struct Sorter<BITONIC>
|
|
{
|
|
typedef TextureCL EigType;
|
|
|
|
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
|
{
|
|
Context * cxt = Context::getContext();
|
|
size_t globalThreads[3] = {count / 2, 1, 1};
|
|
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
|
|
|
// 2^numStages should be equal to count or the output is invalid
|
|
int numStages = 0;
|
|
for(int i = count; i > 1; i >>= 1)
|
|
{
|
|
++numStages;
|
|
}
|
|
const int argc = 5;
|
|
std::vector< std::pair<size_t, const void *> > args(argc);
|
|
String kernelname = "sortCorners_bitonicSort";
|
|
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
|
|
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
|
|
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
|
|
for(int stage = 0; stage < numStages; ++stage)
|
|
{
|
|
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
|
|
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
|
|
{
|
|
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
|
|
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
template<>
|
|
struct Sorter<SELECTION>
|
|
{
|
|
typedef TextureCL EigType;
|
|
|
|
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
|
{
|
|
Context * cxt = Context::getContext();
|
|
|
|
size_t globalThreads[3] = {count, 1, 1};
|
|
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
|
|
|
std::vector< std::pair<size_t, const void *> > args;
|
|
//local
|
|
String kernelname = "sortCorners_selectionSortLocal";
|
|
int lds_size = GROUP_SIZE * sizeof(cl_float2);
|
|
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
|
|
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
|
|
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
|
|
args.push_back( std::make_pair( lds_size, (void*)NULL) );
|
|
|
|
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
|
|
|
//final
|
|
kernelname = "sortCorners_selectionSortFinal";
|
|
args.pop_back();
|
|
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
|
}
|
|
};
|
|
|
|
int findCorners_caller(
|
|
const TextureCL& eig,
|
|
const float threshold,
|
|
const oclMat& mask,
|
|
oclMat& corners,
|
|
const int max_count)
|
|
{
|
|
std::vector<int> k;
|
|
Context * cxt = Context::getContext();
|
|
|
|
std::vector< std::pair<size_t, const void*> > args;
|
|
String kernelname = "findCorners";
|
|
|
|
const int mask_strip = mask.step / mask.elemSize1();
|
|
|
|
oclMat g_counter(1, 1, CV_32SC1);
|
|
g_counter.setTo(0);
|
|
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig ));
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
|
|
args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols ));
|
|
args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count ));
|
|
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
|
|
|
|
size_t globalThreads[3] = {eig.cols, eig.rows, 1};
|
|
size_t localThreads[3] = {16, 16, 1};
|
|
|
|
const char * opt = mask.empty() ? "" : "-D WITH_MASK";
|
|
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
|
|
return std::min(Mat(g_counter).at<int>(0), max_count);
|
|
}
|
|
}//unnamed namespace
|
|
|
|
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
|
|
{
|
|
CV_Assert(qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0);
|
|
CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()));
|
|
|
|
ensureSizeIsEnough(image.size(), CV_32F, eig_);
|
|
|
|
if (useHarrisDetector)
|
|
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
|
else
|
|
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
|
|
|
|
double maxVal = 0;
|
|
minMax(eig_, NULL, &maxVal);
|
|
|
|
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
|
|
|
|
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
|
|
int total = findCorners_caller(
|
|
*eig_tex,
|
|
static_cast<float>(maxVal * qualityLevel),
|
|
mask,
|
|
tmpCorners_,
|
|
tmpCorners_.cols);
|
|
|
|
if (total == 0)
|
|
{
|
|
corners.release();
|
|
return;
|
|
}
|
|
if(use_cpu_sorter)
|
|
{
|
|
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
|
|
}
|
|
else
|
|
{
|
|
//if total is power of 2
|
|
if(((total - 1) & (total)) == 0)
|
|
{
|
|
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
|
}
|
|
else
|
|
{
|
|
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
|
}
|
|
}
|
|
|
|
if (minDistance < 1)
|
|
{
|
|
Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
|
|
tmpCorners_(roi_range).copyTo(corners);
|
|
}
|
|
else
|
|
{
|
|
std::vector<Point2f> tmp(total);
|
|
downloadPoints(tmpCorners_, tmp);
|
|
|
|
std::vector<Point2f> tmp2;
|
|
tmp2.reserve(total);
|
|
|
|
const int cell_size = cvRound(minDistance);
|
|
const int grid_width = (image.cols + cell_size - 1) / cell_size;
|
|
const int grid_height = (image.rows + cell_size - 1) / cell_size;
|
|
|
|
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
|
|
|
|
for (int i = 0; i < total; ++i)
|
|
{
|
|
Point2f p = tmp[i];
|
|
|
|
bool good = true;
|
|
|
|
int x_cell = static_cast<int>(p.x / cell_size);
|
|
int y_cell = static_cast<int>(p.y / cell_size);
|
|
|
|
int x1 = x_cell - 1;
|
|
int y1 = y_cell - 1;
|
|
int x2 = x_cell + 1;
|
|
int y2 = y_cell + 1;
|
|
|
|
// boundary check
|
|
x1 = std::max(0, x1);
|
|
y1 = std::max(0, y1);
|
|
x2 = std::min(grid_width - 1, x2);
|
|
y2 = std::min(grid_height - 1, y2);
|
|
|
|
for (int yy = y1; yy <= y2; yy++)
|
|
{
|
|
for (int xx = x1; xx <= x2; xx++)
|
|
{
|
|
std::vector<Point2f>& m = grid[yy * grid_width + xx];
|
|
|
|
if (!m.empty())
|
|
{
|
|
for(size_t j = 0; j < m.size(); j++)
|
|
{
|
|
float dx = p.x - m[j].x;
|
|
float dy = p.y - m[j].y;
|
|
|
|
if (dx * dx + dy * dy < minDistance * minDistance)
|
|
{
|
|
good = false;
|
|
goto break_out;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
break_out:
|
|
|
|
if(good)
|
|
{
|
|
grid[y_cell * grid_width + x_cell].push_back(p);
|
|
|
|
tmp2.push_back(p);
|
|
|
|
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
|
|
break;
|
|
}
|
|
}
|
|
|
|
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
|
|
}
|
|
}
|
|
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
|
|
{
|
|
CV_DbgAssert(points.type() == CV_32FC2);
|
|
points_v.resize(points.cols);
|
|
openCLSafeCall(clEnqueueReadBuffer(
|
|
*(cl_command_queue*)getClCommandQueuePtr(),
|
|
reinterpret_cast<cl_mem>(points.data),
|
|
CL_TRUE,
|
|
0,
|
|
points.cols * sizeof(Point2f),
|
|
&points_v[0],
|
|
0,
|
|
NULL,
|
|
NULL));
|
|
}
|