Optimization for HoughLinesP
This commit is contained in:
parent
eaf5a163b1
commit
66a8acfd3d
@ -117,7 +117,7 @@ namespace cv { namespace cuda { namespace device
|
|||||||
if (dir.x < 0)
|
if (dir.x < 0)
|
||||||
dir = -dir;
|
dir = -dir;
|
||||||
}
|
}
|
||||||
else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows))
|
else if (pb[1].x == cols - 1 && (pb[1].y >= 0 && pb[1].y < rows))
|
||||||
{
|
{
|
||||||
p0 = pb[1];
|
p0 = pb[1];
|
||||||
if (dir.x > 0)
|
if (dir.x > 0)
|
||||||
|
@ -221,7 +221,7 @@ HoughLinesSDiv( const Mat& img,
|
|||||||
std::vector<hough_index> lst;
|
std::vector<hough_index> lst;
|
||||||
|
|
||||||
CV_Assert( img.type() == CV_8UC1 );
|
CV_Assert( img.type() == CV_8UC1 );
|
||||||
CV_Assert( linesMax > 0 && rho > 0 && theta > 0 );
|
CV_Assert( linesMax > 0 );
|
||||||
|
|
||||||
threshold = MIN( threshold, 255 );
|
threshold = MIN( threshold, 255 );
|
||||||
|
|
||||||
@ -655,6 +655,8 @@ HoughLinesProbabilistic( Mat& image,
|
|||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
#ifdef HAVE_OPENCL
|
||||||
|
|
||||||
|
#define OCL_MAX_LINES 4096
|
||||||
|
|
||||||
static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters)
|
static bool ocl_makePointsList(InputArray _src, OutputArray _pointsList, InputOutputArray _counters)
|
||||||
{
|
{
|
||||||
UMat src = _src.getUMat();
|
UMat src = _src.getUMat();
|
||||||
@ -702,7 +704,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_
|
|||||||
if (fillAccumKernel.empty())
|
if (fillAccumKernel.empty())
|
||||||
return false;
|
return false;
|
||||||
globalThreads[0] = workgroup_size; globalThreads[1] = numangle;
|
globalThreads[0] = workgroup_size; globalThreads[1] = numangle;
|
||||||
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
|
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnlyNoSize(accum),
|
||||||
total_points, irho, (float) theta, numrho, numangle);
|
total_points, irho, (float) theta, numrho, numangle);
|
||||||
return fillAccumKernel.run(2, globalThreads, NULL, false);
|
return fillAccumKernel.run(2, globalThreads, NULL, false);
|
||||||
}
|
}
|
||||||
@ -714,7 +716,7 @@ static bool ocl_fillAccum(InputArray _pointsList, OutputArray _accum, int total_
|
|||||||
return false;
|
return false;
|
||||||
localThreads[0] = workgroup_size; localThreads[1] = 1;
|
localThreads[0] = workgroup_size; localThreads[1] = 1;
|
||||||
globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2;
|
globalThreads[0] = workgroup_size; globalThreads[1] = numangle+2;
|
||||||
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnly(accum),
|
fillAccumKernel.args(ocl::KernelArg::ReadOnlyNoSize(pointsList), ocl::KernelArg::WriteOnlyNoSize(accum),
|
||||||
total_points, irho, (float) theta, numrho, numangle);
|
total_points, irho, (float) theta, numrho, numangle);
|
||||||
return fillAccumKernel.run(2, globalThreads, localThreads, false);
|
return fillAccumKernel.run(2, globalThreads, localThreads, false);
|
||||||
}
|
}
|
||||||
@ -731,6 +733,9 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
|
|||||||
if (min_theta < 0 || min_theta > max_theta ) {
|
if (min_theta < 0 || min_theta > max_theta ) {
|
||||||
CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" );
|
CV_Error( CV_StsBadArg, "min_theta must fall between 0 and max_theta" );
|
||||||
}
|
}
|
||||||
|
if (!(rho > 0 && theta > 0)) {
|
||||||
|
CV_Error( CV_StsBadArg, "rho and theta must be greater 0" );
|
||||||
|
}
|
||||||
|
|
||||||
UMat src = _src.getUMat();
|
UMat src = _src.getUMat();
|
||||||
int numangle = cvRound((max_theta - min_theta) / theta);
|
int numangle = cvRound((max_theta - min_theta) / theta);
|
||||||
@ -759,8 +764,7 @@ static bool ocl_HoughLines(InputArray _src, OutputArray _lines, double rho, doub
|
|||||||
if (getLinesKernel.empty())
|
if (getLinesKernel.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// TODO: investigate other strategies to choose linesMax
|
int linesMax = threshold > 0 ? min(total_points*numangle/threshold, OCL_MAX_LINES) : OCL_MAX_LINES;
|
||||||
int linesMax = min(total_points*numangle/threshold, 4096);
|
|
||||||
UMat lines(linesMax, 1, CV_32FC2);
|
UMat lines(linesMax, 1, CV_32FC2);
|
||||||
|
|
||||||
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines),
|
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::WriteOnlyNoSize(lines),
|
||||||
@ -783,6 +787,10 @@ static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, dou
|
|||||||
{
|
{
|
||||||
CV_Assert(_src.type() == CV_8UC1);
|
CV_Assert(_src.type() == CV_8UC1);
|
||||||
|
|
||||||
|
if (!(rho > 0 && theta > 0)) {
|
||||||
|
CV_Error( CV_StsBadArg, "rho and theta must be greater 0" );
|
||||||
|
}
|
||||||
|
|
||||||
UMat src = _src.getUMat();
|
UMat src = _src.getUMat();
|
||||||
int numangle = cvRound(CV_PI / theta);
|
int numangle = cvRound(CV_PI / theta);
|
||||||
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
|
int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho);
|
||||||
@ -809,8 +817,7 @@ static bool ocl_HoughLinesP(InputArray _src, OutputArray _lines, double rho, dou
|
|||||||
if (getLinesKernel.empty())
|
if (getLinesKernel.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// TODO: investigate other strategies to choose linesMax
|
int linesMax = threshold > 0 ? min(total_points*numangle/threshold, OCL_MAX_LINES) : OCL_MAX_LINES;
|
||||||
int linesMax = min(total_points*numangle/threshold, 4096);
|
|
||||||
UMat lines(linesMax, 1, CV_32SC4);
|
UMat lines(linesMax, 1, CV_32SC4);
|
||||||
|
|
||||||
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::ReadOnly(src),
|
getLinesKernel.args(ocl::KernelArg::ReadOnly(accum), ocl::KernelArg::ReadOnly(src),
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
// Copyright (C) 2014, Itseez, Inc., all rights reserved.
|
// Copyright (C) 2014, Itseez, Inc., all rights reserved.
|
||||||
// Third party copyrights are property of their respective owners.
|
// Third party copyrights are property of their respective owners.
|
||||||
|
|
||||||
|
#define ACCUM(ptr) *((__global int*)(ptr))
|
||||||
|
|
||||||
#ifdef MAKE_POINTS_LIST
|
#ifdef MAKE_POINTS_LIST
|
||||||
|
|
||||||
__kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void make_point_list(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
@ -25,11 +27,13 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int
|
|||||||
|
|
||||||
if (y < src_rows)
|
if (y < src_rows)
|
||||||
{
|
{
|
||||||
|
y <<= 16;
|
||||||
|
|
||||||
for (int i=x; i < src_cols; i+=GROUP_SIZE)
|
for (int i=x; i < src_cols; i+=GROUP_SIZE)
|
||||||
{
|
{
|
||||||
if (src[i])
|
if (src[i])
|
||||||
{
|
{
|
||||||
int val = (y << 16) | i;
|
int val = y | i;
|
||||||
int index = atomic_inc(&l_index);
|
int index = atomic_inc(&l_index);
|
||||||
l_points[index] = val;
|
l_points[index] = val;
|
||||||
}
|
}
|
||||||
@ -53,7 +57,7 @@ __kernel void make_point_list(__global const uchar * src_ptr, int src_step, int
|
|||||||
#elif defined FILL_ACCUM_GLOBAL
|
#elif defined FILL_ACCUM_GLOBAL
|
||||||
|
|
||||||
__kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset,
|
__kernel void fill_accum_global(__global const uchar * list_ptr, int list_step, int list_offset,
|
||||||
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
__global uchar * accum_ptr, int accum_step, int accum_offset,
|
||||||
int total_points, float irho, float theta, int numrho, int numangle)
|
int total_points, float irho, float theta, int numrho, int numangle)
|
||||||
{
|
{
|
||||||
int theta_idx = get_global_id(1);
|
int theta_idx = get_global_id(1);
|
||||||
@ -76,7 +80,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step,
|
|||||||
const int x = (val & 0xFFFF);
|
const int x = (val & 0xFFFF);
|
||||||
const int y = (val >> 16) & 0xFFFF;
|
const int y = (val >> 16) & 0xFFFF;
|
||||||
|
|
||||||
int r = convert_int_rte(x * cosVal + y * sinVal) + shift;
|
int r = convert_int_rte(mad(x, cosVal, y * sinVal)) + shift;
|
||||||
atomic_inc(accum + r + 1);
|
atomic_inc(accum + r + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -85,7 +89,7 @@ __kernel void fill_accum_global(__global const uchar * list_ptr, int list_step,
|
|||||||
#elif defined FILL_ACCUM_LOCAL
|
#elif defined FILL_ACCUM_LOCAL
|
||||||
|
|
||||||
__kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset,
|
__kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, int list_offset,
|
||||||
__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
__global uchar * accum_ptr, int accum_step, int accum_offset,
|
||||||
int total_points, float irho, float theta, int numrho, int numangle)
|
int total_points, float irho, float theta, int numrho, int numangle)
|
||||||
{
|
{
|
||||||
int theta_idx = get_group_id(1);
|
int theta_idx = get_group_id(1);
|
||||||
@ -133,15 +137,13 @@ __kernel void fill_accum_local(__global const uchar * list_ptr, int list_step, i
|
|||||||
|
|
||||||
#elif defined GET_LINES
|
#elif defined GET_LINES
|
||||||
|
|
||||||
#define ACCUM(ptr) *((__global int*)(ptr))
|
|
||||||
|
|
||||||
__kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
__kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
||||||
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
|
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
|
||||||
int linesMax, int threshold, float rho, float theta)
|
int linesMax, int threshold, float rho, float theta)
|
||||||
{
|
{
|
||||||
int x0 = get_global_id(0);
|
int x0 = get_global_id(0);
|
||||||
int y = get_global_id(1);
|
int y = get_global_id(1);
|
||||||
int gl_size = get_global_size(0);
|
int glob_size = get_global_size(0);
|
||||||
|
|
||||||
if (y < accum_rows-2)
|
if (y < accum_rows-2)
|
||||||
{
|
{
|
||||||
@ -149,7 +151,7 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of
|
|||||||
__global float2* lines = (__global float2*)(lines_ptr + lines_offset);
|
__global float2* lines = (__global float2*)(lines_ptr + lines_offset);
|
||||||
__global int* lines_index = lines_index_ptr + 1;
|
__global int* lines_index = lines_index_ptr + 1;
|
||||||
|
|
||||||
for (int x=x0; x<accum_cols-2; x+=gl_size)
|
for (int x=x0; x<accum_cols-2; x+=glob_size)
|
||||||
{
|
{
|
||||||
int curVote = ACCUM(accum);
|
int curVote = ACCUM(accum);
|
||||||
|
|
||||||
@ -167,15 +169,13 @@ __kernel void get_lines(__global uchar * accum_ptr, int accum_step, int accum_of
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
accum += gl_size * (int) sizeof(int);
|
accum += glob_size * (int) sizeof(int);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif GET_LINES_PROBABOLISTIC
|
#elif GET_LINES_PROBABOLISTIC
|
||||||
|
|
||||||
#define ACCUM(ptr) *((__global int*)(ptr))
|
|
||||||
|
|
||||||
__kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
__kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int accum_offset, int accum_rows, int accum_cols,
|
||||||
__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
|
__global uchar * lines_ptr, int lines_step, int lines_offset, __global int* lines_index_ptr,
|
||||||
@ -222,6 +222,7 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac
|
|||||||
pb[1].x = src_cols - 1;
|
pb[1].x = src_cols - 1;
|
||||||
pb[1].y = p0.y + a * dir.y;
|
pb[1].y = p0.y + a * dir.y;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dir.y != 0)
|
if (dir.y != 0)
|
||||||
{
|
{
|
||||||
a = -p0.y / dir.y;
|
a = -p0.y / dir.y;
|
||||||
@ -239,7 +240,7 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac
|
|||||||
if (dir.x < 0)
|
if (dir.x < 0)
|
||||||
dir = -dir;
|
dir = -dir;
|
||||||
}
|
}
|
||||||
else if (pb[1].x == src_cols - 1 && (pb[0].y >= 0 && pb[0].y < src_rows))
|
else if (pb[1].x == src_cols - 1 && (pb[1].y >= 0 && pb[1].y < src_rows))
|
||||||
{
|
{
|
||||||
p0 = pb[1];
|
p0 = pb[1];
|
||||||
if (dir.x > 0)
|
if (dir.x > 0)
|
||||||
@ -258,41 +259,30 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac
|
|||||||
dir = -dir;
|
dir = -dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
float2 d;
|
dir /= max(fabs(dir.x), fabs(dir.y));
|
||||||
if (fabs(dir.x) > fabs(dir.y))
|
|
||||||
{
|
|
||||||
d.x = dir.x > 0 ? 1 : -1;
|
|
||||||
d.y = dir.y / fabs(dir.x);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
d.x = dir.x / fabs(dir.y);
|
|
||||||
d.y = dir.y > 0 ? 1 : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
float2 line_end[2];
|
float2 line_end[2];
|
||||||
int gap;
|
int gap;
|
||||||
bool inLine = false;
|
bool inLine = false;
|
||||||
|
|
||||||
float2 p1 = p0;
|
if (p0.x < 0 || p0.x >= src_cols || p0.y < 0 || p0.y >= src_rows)
|
||||||
if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows)
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
if (*(src_ptr + mad24(p1.y, src_step, p1.x + src_offset)))
|
if (*(src_ptr + mad24(p0.y, src_step, p0.x + src_offset)))
|
||||||
{
|
{
|
||||||
gap = 0;
|
gap = 0;
|
||||||
|
|
||||||
if (!inLine)
|
if (!inLine)
|
||||||
{
|
{
|
||||||
line_end[0] = p1;
|
line_end[0] = p0;
|
||||||
line_end[1] = p1;
|
line_end[1] = p0;
|
||||||
inLine = true;
|
inLine = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
line_end[1] = p1;
|
line_end[1] = p0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (inLine)
|
else if (inLine)
|
||||||
@ -314,8 +304,8 @@ __kernel void get_lines(__global const uchar * accum_ptr, int accum_step, int ac
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
p1 = p1 + d;
|
p0 = p0 + dir;
|
||||||
if (p1.x < 0 || p1.x >= src_cols || p1.y < 0 || p1.y >= src_rows)
|
if (p0.x < 0 || p0.x >= src_cols || p0.y < 0 || p0.y >= src_rows)
|
||||||
{
|
{
|
||||||
if (inLine)
|
if (inLine)
|
||||||
{
|
{
|
||||||
|
@ -168,7 +168,7 @@ OCL_TEST_P(HoughLinesP, RealImage)
|
|||||||
OCL_OFF(cv::HoughLinesP(src, dst, rhoStep, thetaStep, threshold, minLineLength, maxGap));
|
OCL_OFF(cv::HoughLinesP(src, dst, rhoStep, thetaStep, threshold, minLineLength, maxGap));
|
||||||
OCL_ON(cv::HoughLinesP(usrc, udst, rhoStep, thetaStep, threshold, minLineLength, maxGap));
|
OCL_ON(cv::HoughLinesP(usrc, udst, rhoStep, thetaStep, threshold, minLineLength, maxGap));
|
||||||
|
|
||||||
Near(0.2);
|
Near(0.25);
|
||||||
}
|
}
|
||||||
|
|
||||||
OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep
|
OCL_INSTANTIATE_TEST_CASE_P(Imgproc, HoughLines, Combine(Values(1, 0.5), // rhoStep
|
||||||
|
Loading…
x
Reference in New Issue
Block a user