Merge pull request #3210 from akarsakov:ocl_gftt_opt

This commit is contained in:
Alexander Alekhin 2014-10-07 09:06:54 +00:00
commit 14d5358982
2 changed files with 27 additions and 29 deletions

View File

@ -81,10 +81,12 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
cornerMinEigenVal( _image, eig, blockSize, 3 );
Size imgsize = _image.size();
std::vector<Corner> tmpCorners;
size_t total, i, j, ncorners = 0, possibleCornersCount =
std::max(1024, static_cast<int>(imgsize.area() * 0.1));
bool haveMask = !_mask.empty();
UMat corners_buffer(1, (int)possibleCornersCount + 1, CV_32FC2);
CV_Assert(sizeof(Corner) == corners_buffer.elemSize());
Mat tmpCorners;
// find threshold
{
@ -108,7 +110,8 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
ocl::KernelArg eigarg = ocl::KernelArg::ReadOnlyNoSize(eig),
dbarg = ocl::KernelArg::PtrWriteOnly(maxEigenValue),
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask);
maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
cornersarg = ocl::KernelArg::PtrWriteOnly(corners_buffer);
if (haveMask)
k.args(eigarg, eig.cols, (int)eig.total(), dbarg, maskarg);
@ -125,7 +128,7 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
if (k2.empty())
return false;
k2.args(dbarg, (float)qualityLevel);
k2.args(dbarg, (float)qualityLevel, cornersarg);
if (!k2.runTask(false))
return false;
@ -138,24 +141,18 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
if (k.empty())
return false;
UMat counter(1, 1, CV_32SC1, Scalar::all(0)),
corners(1, (int)possibleCornersCount, CV_32FC2, Scalar::all(-1));
CV_Assert(sizeof(Corner) == corners.elemSize());
ocl::KernelArg eigarg = ocl::KernelArg::ReadOnlyNoSize(eig),
cornersarg = ocl::KernelArg::PtrWriteOnly(corners),
counterarg = ocl::KernelArg::PtrReadWrite(counter),
cornersarg = ocl::KernelArg::PtrWriteOnly(corners_buffer),
thresholdarg = ocl::KernelArg::PtrReadOnly(maxEigenValue);
if (!haveMask)
k.args(eigarg, cornersarg, counterarg,
eig.rows - 2, eig.cols - 2, thresholdarg,
(int)possibleCornersCount);
k.args(eigarg, cornersarg, eig.rows - 2, eig.cols - 2, thresholdarg,
(int)possibleCornersCount);
else
{
UMat mask = _mask.getUMat();
k.args(eigarg, ocl::KernelArg::ReadOnlyNoSize(mask),
cornersarg, counterarg, eig.rows - 2, eig.cols - 2,
cornersarg, eig.rows - 2, eig.cols - 2,
thresholdarg, (int)possibleCornersCount);
}
@ -163,19 +160,17 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
if (!k.run(2, globalsize, NULL, false))
return false;
total = std::min<size_t>(counter.getMat(ACCESS_READ).at<int>(0, 0), possibleCornersCount);
tmpCorners = corners_buffer.getMat(ACCESS_RW);
total = std::min<size_t>(tmpCorners.at<Vec2i>(0, 0)[0], possibleCornersCount);
if (total == 0)
{
_corners.release();
return true;
}
tmpCorners.resize(total);
Mat mcorners(1, (int)total, CV_32FC2, &tmpCorners[0]);
corners.colRange(0, (int)total).copyTo(mcorners);
}
std::sort(tmpCorners.begin(), tmpCorners.end());
Corner* corner_ptr = tmpCorners.ptr<Corner>() + 1;
std::sort(corner_ptr, corner_ptr + total);
std::vector<Point2f> corners;
corners.reserve(total);
@ -194,7 +189,7 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
for( i = 0; i < total; i++ )
{
const Corner & c = tmpCorners[i];
const Corner & c = corner_ptr[i];
bool good = true;
int x_cell = c.x / cell_size;
@ -250,7 +245,7 @@ static bool ocl_goodFeaturesToTrack( InputArray _image, OutputArray _corners,
{
for( i = 0; i < total; i++ )
{
const Corner & c = tmpCorners[i];
const Corner & c = corner_ptr[i];
corners.push_back(Point2f((float)c.x, (float)c.y));
++ncorners;

View File

@ -91,7 +91,8 @@ __kernel void maxEigenVal(__global const uchar * srcptr, int src_step, int src_o
*(__global float *)(dstptr + (int)sizeof(float) * gid) = localmem_max[0];
}
__kernel void maxEigenValTask(__global float * dst, float qualityLevel)
__kernel void maxEigenValTask(__global float * dst, float qualityLevel,
__global int * cornersptr)
{
float maxval = -FLT_MAX;
@ -100,6 +101,7 @@ __kernel void maxEigenValTask(__global float * dst, float qualityLevel)
maxval = max(maxval, dst[x]);
dst[0] = maxval * qualityLevel;
cornersptr[0] = 0;
}
#elif OP_FIND_CORNERS
@ -110,12 +112,15 @@ __kernel void findCorners(__global const uchar * eigptr, int eig_step, int eig_o
#ifdef HAVE_MASK
__global const uchar * mask, int mask_step, int mask_offset,
#endif
__global uchar * cornersptr, __global int * counter,
int rows, int cols, __constant float * threshold, int max_corners)
__global uchar * cornersptr, int rows, int cols,
__constant float * threshold, int max_corners)
{
int x = get_global_id(0);
int y = get_global_id(1);
__global int* counter = (__global int*) cornersptr;
__global float2 * corners = (__global float2 *)(cornersptr + (int)sizeof(float2));
if (y < rows && x < cols
#ifdef HAVE_MASK
&& mask[mad24(y, mask_step, x + mask_offset)]
@ -144,11 +149,9 @@ __kernel void findCorners(__global const uchar * eigptr, int eig_step, int eig_o
int ind = atomic_inc(counter);
if (ind < max_corners)
{
__global float2 * corners = (__global float2 *)(cornersptr + ind * (int)sizeof(float2));
// pack and store eigenvalue and its coordinates
corners[0].x = val;
corners[0].y = as_float(y | (x << 16));
corners[ind].x = val;
corners[ind].y = as_float(y | (x << 16));
}
}
}