Fix a bug in ocl::Erode/Dilate, simplify the host logic

This commit is contained in:
yao 2013-03-12 16:45:43 +08:00
parent eaaba1336a
commit 084385cf38
2 changed files with 9 additions and 15 deletions

View File

@ -195,7 +195,7 @@ public:
namespace namespace
{ {
typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, bool rectKernel, bool usrROI); typedef void (*GPUMorfFilter_t)(const oclMat & , oclMat & , oclMat & , Size &, const Point, bool rectKernel);
class MorphFilter_GPU : public BaseFilter_GPU class MorphFilter_GPU : public BaseFilter_GPU
{ {
@ -205,7 +205,7 @@ public:
virtual void operator()(const oclMat &src, oclMat &dst) virtual void operator()(const oclMat &src, oclMat &dst)
{ {
func(src, dst, kernel, ksize, anchor, rectKernel, false) ; func(src, dst, kernel, ksize, anchor, rectKernel) ;
} }
oclMat kernel; oclMat kernel;
@ -220,7 +220,7 @@ public:
**Note that the kernel need to be further refined. **Note that the kernel need to be further refined.
*/ */
static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel, static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
Size &ksize, const Point anchor, bool rectKernel, bool useROI) Size &ksize, const Point anchor, bool rectKernel)
{ {
//Normalize the result by default //Normalize the result by default
//float alpha = ksize.height * ksize.width; //float alpha = ksize.height * ksize.width;
@ -276,11 +276,10 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
} }
char compile_option[128]; char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s %s", sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D ERODE %s %s",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
rectKernel?"-D RECTKERNEL":"", rectKernel?"-D RECTKERNEL":"",
useROI?"-D USEROI":"", s);
s);
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));
args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&dst.data));
@ -300,7 +299,7 @@ static void GPUErode(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
//! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4 //! data type supported: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4
static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel, static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
Size &ksize, const Point anchor, bool rectKernel, bool useROI) Size &ksize, const Point anchor, bool rectKernel)
{ {
//Normalize the result by default //Normalize the result by default
//float alpha = ksize.height * ksize.width; //float alpha = ksize.height * ksize.width;
@ -357,10 +356,9 @@ static void GPUDilate(const oclMat &src, oclMat &dst, oclMat &mat_kernel,
} }
char compile_option[128]; char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s %s", sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D DILATE %s %s",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1],
rectKernel?"-D RECTKERNEL":"", rectKernel?"-D RECTKERNEL":"",
useROI?"-D USEROI":"",
s); s);
vector< pair<size_t, const void *> > args; vector< pair<size_t, const void *> > args;
args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data)); args.push_back(make_pair(sizeof(cl_mem), (void *)&src.data));

View File

@ -120,8 +120,7 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
int gidy = get_global_id(1); int gidy = get_global_id(1);
int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel); int out_addr = mad24(gidy,dst_step_in_pixel,gidx+dst_offset_in_pixel);
#ifdef USEROI if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3)==0)
if(gidx+3<cols && gidy<rows && (dst_offset_in_pixel&3==0))
{ {
*(__global uchar4*)&dst[out_addr] = res; *(__global uchar4*)&dst[out_addr] = res;
} }
@ -150,9 +149,6 @@ __kernel void morph_C1_D0(__global const uchar * restrict src,
dst[out_addr] = res.x; dst[out_addr] = res.x;
} }
} }
#else
*(__global uchar4*)&dst[out_addr] = res;
#endif
} }
#else #else
__kernel void morph(__global const GENTYPE * restrict src, __kernel void morph(__global const GENTYPE * restrict src,