A little optimization on ocl/pyrdown, ocl/canny
This commit is contained in:
parent
907a9101eb
commit
1b6639aa3d
@ -66,7 +66,7 @@ namespace cv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_)
|
cv::ocl::CannyBuf::CannyBuf(const oclMat& dx_, const oclMat& dy_) : dx(dx_), dy(dy_), counter(NULL)
|
||||||
{
|
{
|
||||||
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
|
CV_Assert(dx_.type() == CV_32SC1 && dy_.type() == CV_32SC1 && dx_.size() == dy_.size());
|
||||||
|
|
||||||
@ -102,6 +102,10 @@ void cv::ocl::CannyBuf::create(const Size& image_size, int apperture_size)
|
|||||||
|
|
||||||
float counter_f [1] = { 0 };
|
float counter_f [1] = { 0 };
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
if(counter)
|
||||||
|
{
|
||||||
|
openCLFree(counter);
|
||||||
|
}
|
||||||
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
|
counter = clCreateBuffer( Context::getContext()->impl->clContext, CL_MEM_COPY_HOST_PTR, sizeof(float), counter_f, &err );
|
||||||
openCLSafeCall(err);
|
openCLSafeCall(err);
|
||||||
}
|
}
|
||||||
@ -322,15 +326,11 @@ void canny::calcMap_gpu(oclMat& dx, oclMat& dy, oclMat& mag, oclMat& map, int ro
|
|||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map.step));
|
||||||
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
args.push_back( make_pair( sizeof(cl_int), (void *)&map.offset));
|
||||||
|
|
||||||
#if CALCMAP_FIXED
|
|
||||||
size_t globalThreads[3] = {cols, rows, 1};
|
size_t globalThreads[3] = {cols, rows, 1};
|
||||||
string kernelName = "calcMap";
|
string kernelName = "calcMap";
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
#else
|
|
||||||
size_t globalThreads[3] = {cols, rows, 1};
|
|
||||||
string kernelName = "calcMap_2";
|
|
||||||
size_t localThreads[3] = {256, 1, 1};
|
|
||||||
#endif
|
|
||||||
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
openCLExecuteKernel(clCxt, &imgproc_canny, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@
|
|||||||
//
|
//
|
||||||
// @Authors
|
// @Authors
|
||||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@ -118,81 +119,19 @@ uchar4 round_uchar4_float4(float4 v)
|
|||||||
return round_uchar4_int4(iv);
|
return round_uchar4_int4(iv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define IDX_ROW_HIGH(y,last_row) (abs_diff((int)abs_diff(last_row,y),last_row) % ((last_row)+1))
|
||||||
|
#define IDX_ROW_LOW(y,last_row) (abs(y) % ((last_row) + 1))
|
||||||
|
#define IDX_COL_HIGH(x,last_col) abs_diff((int)abs_diff(x,last_col),last_col)
|
||||||
int idx_row_low(int y, int last_row)
|
#define IDX_COL_LOW(x,last_col) (abs(x) % ((last_col) + 1))
|
||||||
{
|
|
||||||
if(y < 0)
|
|
||||||
{
|
|
||||||
y = -y;
|
|
||||||
}
|
|
||||||
return y % (last_row + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int idx_row_high(int y, int last_row)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
if(last_row - y < 0)
|
|
||||||
{
|
|
||||||
i = (y - last_row);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
i = (last_row - y);
|
|
||||||
}
|
|
||||||
if(last_row - i < 0)
|
|
||||||
{
|
|
||||||
j = i - last_row;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
j = last_row - i;
|
|
||||||
}
|
|
||||||
return j % (last_row + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int idx_row(int y, int last_row)
|
int idx_row(int y, int last_row)
|
||||||
{
|
{
|
||||||
return idx_row_low(idx_row_high(y, last_row), last_row);
|
return IDX_ROW_LOW(IDX_ROW_HIGH(y,last_row),last_row);
|
||||||
}
|
|
||||||
|
|
||||||
int idx_col_low(int x, int last_col)
|
|
||||||
{
|
|
||||||
if(x < 0)
|
|
||||||
{
|
|
||||||
x = -x;
|
|
||||||
}
|
|
||||||
return x % (last_col + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
int idx_col_high(int x, int last_col)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
int j;
|
|
||||||
if(last_col - x < 0)
|
|
||||||
{
|
|
||||||
i = (x - last_col);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
i = (last_col - x);
|
|
||||||
}
|
|
||||||
if(last_col - i < 0)
|
|
||||||
{
|
|
||||||
j = i - last_col;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
j = last_col - i;
|
|
||||||
}
|
|
||||||
return j % (last_col + 1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int idx_col(int x, int last_col)
|
int idx_col(int x, int last_col)
|
||||||
{
|
{
|
||||||
return idx_col_low(idx_col_high(x, last_col), last_col);
|
return IDX_COL_LOW(IDX_COL_HIGH(x,last_col),last_col);
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols)
|
__kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset, int srcRows, int srcCols, __global uchar *dst, int dstStep, int dstOffset, int dstCols)
|
||||||
@ -210,11 +149,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
|
|||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
|
|
||||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)]);
|
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(x, last_col)];
|
||||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)]);
|
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(x, last_col)];
|
||||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)]);
|
sum = sum + 0.375f * ((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(x, last_col)];
|
||||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)]);
|
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(x, last_col)];
|
||||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)]);
|
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(x, last_col)];
|
||||||
|
|
||||||
smem[2 + get_local_id(0)] = sum;
|
smem[2 + get_local_id(0)] = sum;
|
||||||
|
|
||||||
@ -224,11 +163,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
|
|||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
|
|
||||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)]);
|
sum = sum + 0.375f * ((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)]);
|
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(left_x, last_col)];
|
||||||
|
|
||||||
smem[get_local_id(0)] = sum;
|
smem[get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
@ -239,11 +178,11 @@ __kernel void pyrDown_C1_D0(__global uchar * srcData, int srcStep, int srcOffset
|
|||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
|
|
||||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 2, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y - 1, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||||
sum = sum + 0.375f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)]);
|
sum = sum + 0.375f * ((__global uchar*)((__global char*)srcData + idx_row(src_y , last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||||
sum = sum + 0.25f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
sum = sum + 0.25f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 1, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||||
sum = sum + 0.0625f * round_uchar_uchar(((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)]);
|
sum = sum + 0.0625f * ((__global uchar*)((__global char*)srcData + idx_row(src_y + 2, last_row) * srcStep))[idx_col(right_x, last_col)];
|
||||||
|
|
||||||
smem[4 + get_local_id(0)] = sum;
|
smem[4 + get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
@ -288,11 +227,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
|
|||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
|
|
||||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
sum = sum + co1 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]));
|
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(x, last_col)]);
|
||||||
|
|
||||||
smem[2 + get_local_id(0)] = sum;
|
smem[2 + get_local_id(0)] = sum;
|
||||||
|
|
||||||
@ -302,11 +241,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
|
|||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
|
|
||||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
sum = sum + co1 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]));
|
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(left_x, last_col)]);
|
||||||
|
|
||||||
smem[get_local_id(0)] = sum;
|
smem[get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
@ -317,11 +256,11 @@ __kernel void pyrDown_C4_D0(__global uchar4 * srcData, int srcStep, int srcOffse
|
|||||||
|
|
||||||
sum = 0;
|
sum = 0;
|
||||||
|
|
||||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y - 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||||
sum = sum + co1 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
sum = sum + co1 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y , last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||||
sum = sum + co2 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
sum = sum + co2 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 1, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||||
sum = sum + co3 * convert_float4(round_uchar4_uchar4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]));
|
sum = sum + co3 * convert_float4(((__global uchar4*)((__global char4*)srcData + idx_row(src_y + 2, last_row) * srcStep / 4))[idx_col(right_x, last_col)]);
|
||||||
|
|
||||||
smem[4 + get_local_id(0)] = sum;
|
smem[4 + get_local_id(0)] = sum;
|
||||||
}
|
}
|
||||||
|
@ -103,6 +103,6 @@ TEST_P(Canny, Accuracy)
|
|||||||
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
|
EXPECT_MAT_SIMILAR(edges_gold, edges, 1e-2);
|
||||||
}
|
}
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(ocl_ImgProc, Canny, testing::Combine(
|
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny, testing::Combine(
|
||||||
testing::Values(AppertureSize(3), AppertureSize(5)),
|
testing::Values(AppertureSize(3), AppertureSize(5)),
|
||||||
testing::Values(L2gradient(false), L2gradient(true))));
|
testing::Values(L2gradient(false), L2gradient(true))));
|
||||||
|
Loading…
x
Reference in New Issue
Block a user