simplify the kernel logic when using rect kernel or without ROI
This commit is contained in:
parent
db9de43fa5
commit
3c5cb4931e
@ -19,6 +19,7 @@
|
|||||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||||
// Zero Lin, Zero.Lin@amd.com
|
// Zero Lin, Zero.Lin@amd.com
|
||||||
// Zhang Ying, zhangying913@gmail.com
|
// Zhang Ying, zhangying913@gmail.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
//
|
//
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
@ -309,21 +310,22 @@ public:
|
|||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point);
|
typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, bool rectKernel, bool usrROI);
|
||||||
|
|
||||||
class MorphFilter_GPU : public BaseFilter_GPU
|
class MorphFilter_GPU : public BaseFilter_GPU
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
MorphFilter_GPU(const Size &ksize_, const Point &anchor_, const oclMat &kernel_, GPUMorfFilter_t func_) :
|
MorphFilter_GPU(const Size &ksize_, const Point &anchor_, const oclMat &kernel_, GPUMorfFilter_t func_) :
|
||||||
BaseFilter_GPU(ksize_, anchor_, BORDER_CONSTANT), kernel(kernel_), func(func_) {}
|
BaseFilter_GPU(ksize_, anchor_, BORDER_CONSTANT), kernel(kernel_), func(func_), rectKernel(false) {}
|
||||||
|
|
||||||
virtual void operator()(const oclMat &src, oclMat &dst)
|
virtual void operator()(const oclMat &src, oclMat &dst)
|
||||||
{
|
{
|
||||||
func(src, dst, kernel, ksize, anchor) ;
|
func(src, dst, kernel, ksize, anchor, rectKernel, false) ;
|
||||||
}
|
}
|
||||||
|
|
||||||
oclMat kernel;
|
oclMat kernel;
|
||||||
GPUMorfFilter_t func;
|
GPUMorfFilter_t func;
|
||||||
|
bool rectKernel;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -332,7 +334,8 @@ public:
|
|||||||
**Extend this if necessary later.
|
**Extend this if necessary later.
|
||||||
**Note that the kernel need to be further refined.
|
**Note that the kernel need to be further refined.
|
||||||
*/
|
*/
|
||||||
static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, const Point anchor)
|
static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
|
||||||
|
Size &ksize, const Point anchor, bool rectKernel, bool useROI)
|
||||||
{
|
{
|
||||||
//Normalize the result by default
|
//Normalize the result by default
|
||||||
//float alpha = ksize.height * ksize.width;
|
//float alpha = ksize.height * ksize.width;
|
||||||
@ -388,7 +391,11 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &k
|
|||||||
}
|
}
|
||||||
|
|
||||||
char compile_option[128];
|
char compile_option[128];
|
||||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s", anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], s);
|
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s %s",
|
||||||
|
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
|
||||||
|
rectKernel?"-D RECTKERNEL":"",
|
||||||
|
useROI?"-D USEROI":"",
|
||||||
|
s);
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
|
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
|
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||||
@ -407,7 +414,8 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &k
|
|||||||
|
|
||||||
|
|
||||||
//! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4
|
//! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4
|
||||||
static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &ksize, const Point anchor)
|
static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
|
||||||
|
Size &ksize, const Point anchor, bool rectKernel, bool useROI)
|
||||||
{
|
{
|
||||||
//Normalize the result by default
|
//Normalize the result by default
|
||||||
//float alpha = ksize.height * ksize.width;
|
//float alpha = ksize.height * ksize.width;
|
||||||
@ -426,12 +434,13 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &
|
|||||||
Context *clCxt = src.clCxt;
|
Context *clCxt = src.clCxt;
|
||||||
string kernelName;
|
string kernelName;
|
||||||
size_t localThreads[3] = {16, 16, 1};
|
size_t localThreads[3] = {16, 16, 1};
|
||||||
size_t globalThreads[3] = {(src.cols + localThreads[0]) / localThreads[0] *localThreads[0], (src.rows + localThreads[1]) / localThreads[1] *localThreads[1], 1};
|
size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0],
|
||||||
|
(src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
|
||||||
|
|
||||||
if (src.type() == CV_8UC1)
|
if (src.type() == CV_8UC1)
|
||||||
{
|
{
|
||||||
kernelName = "morph_C1_D0";
|
kernelName = "morph_C1_D0";
|
||||||
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0]) / localThreads[0] * localThreads[0];
|
globalThreads[0] = ((src.cols + 3) / 4 + localThreads[0] - 1) / localThreads[0] * localThreads[0];
|
||||||
CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
|
CV_Assert(localThreads[0]*localThreads[1] * 8 >= (localThreads[0] * 4 + ksize.width - 1) * (localThreads[1] + ksize.height - 1));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -463,7 +472,11 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, Size &
|
|||||||
}
|
}
|
||||||
|
|
||||||
char compile_option[128];
|
char compile_option[128];
|
||||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s", anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], s);
|
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s %s",
|
||||||
|
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
|
||||||
|
rectKernel?"-D RECTKERNEL":"",
|
||||||
|
useROI?"-D USEROI":"",
|
||||||
|
s);
|
||||||
vector< pair<size_t, const void *> > args;
|
vector< pair<size_t, const void *> > args;
|
||||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
|
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||||
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
|
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||||
@ -495,7 +508,14 @@ Ptr<BaseFilter_GPU> cv::ocl::getMorphologyFilter_GPU(int op, int type, const Mat
|
|||||||
normalizeKernel(kernel, gpu_krnl);
|
normalizeKernel(kernel, gpu_krnl);
|
||||||
normalizeAnchor(anchor, ksize);
|
normalizeAnchor(anchor, ksize);
|
||||||
|
|
||||||
return Ptr<BaseFilter_GPU>(new MorphFilter_GPU(ksize, anchor, gpu_krnl, GPUMorfFilter_callers[op][CV_MAT_CN(type)]));
|
bool noZero = true;
|
||||||
|
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
|
||||||
|
if(kernel.data[i] != 1)
|
||||||
|
noZero = false;
|
||||||
|
MorphFilter_GPU* mfgpu=new MorphFilter_GPU(ksize, anchor, gpu_krnl, GPUMorfFilter_callers[op][CV_MAT_CN(type)]);
|
||||||
|
if(noZero)
|
||||||
|
mfgpu->rectKernel = true;
|
||||||
|
return Ptr<BaseFilter_GPU>(mfgpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
// @Authors
|
// @Authors
|
||||||
// Niko Li, newlife20080214@gmail.com
|
// Niko Li, newlife20080214@gmail.com
|
||||||
// Zero Lin, zero.lin@amd.com
|
// Zero Lin, zero.lin@amd.com
|
||||||
|
// Yao Wang, bitwangyaoyao@gmail.com
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
// are permitted provided that the following conditions are met:
|
// are permitted provided that the following conditions are met:
|
||||||
//
|
//
|
||||||
@ -100,14 +101,26 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
|
|||||||
LDS_DAT[point2] = temp1;
|
LDS_DAT[point2] = temp1;
|
||||||
barrier(CLK_LOCAL_MEM_FENCE);
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||||||
uchar4 res = (uchar4)VAL;
|
uchar4 res = (uchar4)VAL;
|
||||||
|
|
||||||
for(int i=0; i<2*RADIUSY+1; i++)
|
for(int i=0; i<2*RADIUSY+1; i++)
|
||||||
for(int j=0; j<2*RADIUSX+1; j++)
|
for(int j=0; j<2*RADIUSX+1; j++)
|
||||||
{
|
{
|
||||||
res =mat_kernel[i*(2*RADIUSX+1)+j]? MORPH_OP(res,vload4(0,(__local uchar*)&LDS_DAT[mad24((l_y+i),width,l_x)]+offset+j)):res;
|
res =
|
||||||
|
#ifndef RECTKERNEL
|
||||||
|
mat_kernel[i*(2*RADIUSX+1)+j] ?
|
||||||
|
#endif
|
||||||
|
MORPH_OP(res,vload4(0,(__local uchar*)&LDS_DAT[mad24((l_y+i),width,l_x)]+offset+j))
|
||||||
|
#ifndef RECTKERNEL
|
||||||
|
:res
|
||||||
|
#endif
|
||||||
|
;
|
||||||
}
|
}
|
||||||
|
|
||||||
int gidx = get_global_id(0)<<2;
|
int gidx = get_global_id(0)<<2;
|
||||||
int gidy = get_global_id(1);
|
int gidy = get_global_id(1);
|
||||||
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
|
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
|
||||||
|
|
||||||
|
#ifdef USEROI
|
||||||
if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3==0))
|
if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3==0))
|
||||||
{
|
{
|
||||||
*(__global uchar4*)&dst[out_addr] = res;
|
*(__global uchar4*)&dst[out_addr] = res;
|
||||||
@ -137,6 +150,9 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
|
|||||||
dst[out_addr] = res.x;
|
dst[out_addr] = res.x;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
*(__global uchar4*)&dst[out_addr] = res;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
__kernel void morph(__global const GENTYPE * restrict src,
|
__kernel void morph(__global const GENTYPE * restrict src,
|
||||||
@ -154,7 +170,7 @@ __kernel void morph(__global const GENTYPE * restrict src,
|
|||||||
int y = get_group_id(1)*LSIZE1;
|
int y = get_group_id(1)*LSIZE1;
|
||||||
int start_x = x+src_offset_x-RADIUSX;
|
int start_x = x+src_offset_x-RADIUSX;
|
||||||
int end_x = x + src_offset_x+LSIZE0+RADIUSX;
|
int end_x = x + src_offset_x+LSIZE0+RADIUSX;
|
||||||
int width = end_x -start_x+1;
|
int width = end_x -(x+src_offset_x-RADIUSX)+1;
|
||||||
int start_y = y+src_offset_y-RADIUSY;
|
int start_y = y+src_offset_y-RADIUSY;
|
||||||
int point1 = mad24(l_y,LSIZE0,l_x);
|
int point1 = mad24(l_y,LSIZE0,l_x);
|
||||||
int point2 = point1 + LSIZE0*LSIZE1;
|
int point2 = point1 + LSIZE0*LSIZE1;
|
||||||
@ -191,7 +207,15 @@ __kernel void morph(__global const GENTYPE * restrict src,
|
|||||||
for(int i=0; i<2*RADIUSY+1; i++)
|
for(int i=0; i<2*RADIUSY+1; i++)
|
||||||
for(int j=0; j<2*RADIUSX+1; j++)
|
for(int j=0; j<2*RADIUSX+1; j++)
|
||||||
{
|
{
|
||||||
res =mat_kernel[i*(2*RADIUSX+1)+j]? MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)]):res;
|
res =
|
||||||
|
#ifndef RECTKERNEL
|
||||||
|
mat_kernel[i*(2*RADIUSX+1)+j] ?
|
||||||
|
#endif
|
||||||
|
MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)])
|
||||||
|
#ifndef RECTKERNEL
|
||||||
|
:res
|
||||||
|
#endif
|
||||||
|
;
|
||||||
}
|
}
|
||||||
int gidx = get_global_id(0);
|
int gidx = get_global_id(0);
|
||||||
int gidy = get_global_id(1);
|
int gidy = get_global_id(1);
|
||||||
|
@ -831,13 +831,13 @@ INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine(
|
|||||||
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
Values(1, 3)));
|
Values(1, 3)));
|
||||||
|
|
||||||
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
//INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
||||||
|
|
||||||
//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3)));
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
//INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false)));
|
||||||
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4),
|
||||||
|
Loading…
x
Reference in New Issue
Block a user