added 3-channels support to cv::flip
This commit is contained in:
parent
b70332d806
commit
8d97d0d631
@ -482,9 +482,9 @@ enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS
|
|||||||
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
|
static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
|
||||||
{
|
{
|
||||||
CV_Assert(flipCode >= - 1 && flipCode <= 1);
|
CV_Assert(flipCode >= - 1 && flipCode <= 1);
|
||||||
int type = _src.type(), cn = CV_MAT_CN(type), flipType;
|
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), flipType;
|
||||||
|
|
||||||
if (cn > 4 || cn == 3)
|
if (cn > 4)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
const char * kernelName;
|
const char * kernelName;
|
||||||
@ -506,7 +506,8 @@ static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
|
|||||||
}
|
}
|
||||||
|
|
||||||
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
|
ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
|
||||||
format( "-D type=%s", ocl::memopTypeToStr(type)));
|
format( "-D T=%s -D T1=%s -D cn=%d", ocl::memopTypeToStr(type),
|
||||||
|
ocl::memopTypeToStr(depth), cn));
|
||||||
if (k.empty())
|
if (k.empty())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
@ -39,10 +39,18 @@
|
|||||||
//
|
//
|
||||||
//M*/
|
//M*/
|
||||||
|
|
||||||
#define sizeoftype ((int)sizeof(type))
|
#if cn != 3
|
||||||
|
#define loadpix(addr) *(__global const T *)(addr)
|
||||||
|
#define storepix(val, addr) *(__global T *)(addr) = val
|
||||||
|
#define TSIZE (int)sizeof(T)
|
||||||
|
#else
|
||||||
|
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
|
||||||
|
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
|
||||||
|
#define TSIZE ((int)sizeof(T1)*3)
|
||||||
|
#endif
|
||||||
|
|
||||||
__kernel void arithm_flip_rows(__global const uchar* srcptr, int srcstep, int srcoffset,
|
__kernel void arithm_flip_rows(__global const uchar * srcptr, int src_step, int src_offset,
|
||||||
__global uchar* dstptr, int dststep, int dstoffset,
|
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||||
int rows, int cols, int thread_rows, int thread_cols)
|
int rows, int cols, int thread_rows, int thread_cols)
|
||||||
{
|
{
|
||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
@ -50,19 +58,16 @@ __kernel void arithm_flip_rows(__global const uchar* srcptr, int srcstep, int sr
|
|||||||
|
|
||||||
if (x < cols && y < thread_rows)
|
if (x < cols && y < thread_rows)
|
||||||
{
|
{
|
||||||
__global const type* src0 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x, sizeoftype, srcoffset)));
|
T src0 = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)));
|
||||||
__global const type* src1 = (__global const type*)(srcptr + mad24(rows - y - 1, srcstep, mad24(x, sizeoftype, srcoffset)));
|
T src1 = loadpix(srcptr + mad24(rows - y - 1, src_step, mad24(x, TSIZE, src_offset)));
|
||||||
|
|
||||||
__global type* dst0 = (__global type*)(dstptr + mad24(y, dststep, mad24(x, sizeoftype, dstoffset)));
|
storepix(src1, dstptr + mad24(y, dst_step, mad24(x, TSIZE, dst_offset)));
|
||||||
__global type* dst1 = (__global type*)(dstptr + mad24(rows - y - 1, dststep, mad24(x, sizeoftype, dstoffset)));
|
storepix(src0, dstptr + mad24(rows - y - 1, dst_step, mad24(x, TSIZE, dst_offset)));
|
||||||
|
|
||||||
dst0[0] = src1[0];
|
|
||||||
dst1[0] = src0[0];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void arithm_flip_rows_cols(__global const uchar* srcptr, int srcstep, int srcoffset,
|
__kernel void arithm_flip_rows_cols(__global const uchar * srcptr, int src_step, int src_offset,
|
||||||
__global uchar* dstptr, int dststep, int dstoffset,
|
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||||
int rows, int cols, int thread_rows, int thread_cols)
|
int rows, int cols, int thread_rows, int thread_cols)
|
||||||
{
|
{
|
||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
@ -71,19 +76,16 @@ __kernel void arithm_flip_rows_cols(__global const uchar* srcptr, int srcstep, i
|
|||||||
if (x < cols && y < thread_rows)
|
if (x < cols && y < thread_rows)
|
||||||
{
|
{
|
||||||
int x1 = cols - x - 1;
|
int x1 = cols - x - 1;
|
||||||
__global const type* src0 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x, sizeoftype, srcoffset)));
|
T src0 = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)));
|
||||||
__global const type* src1 = (__global const type*)(srcptr + mad24(rows - y - 1, srcstep, mad24(x1, sizeoftype, srcoffset)));
|
T src1 = loadpix(srcptr + mad24(rows - y - 1, src_step, mad24(x1, TSIZE, src_offset)));
|
||||||
|
|
||||||
__global type* dst0 = (__global type*)(dstptr + mad24(rows - y - 1, dststep, mad24(x1, sizeoftype, dstoffset)));
|
storepix(src0, dstptr + mad24(rows - y - 1, dst_step, mad24(x1, TSIZE, dst_offset)));
|
||||||
__global type* dst1 = (__global type*)(dstptr + mad24(y, dststep, mad24(x, sizeoftype, dstoffset)));
|
storepix(src1, dstptr + mad24(y, dst_step, mad24(x, TSIZE, dst_offset)));
|
||||||
|
|
||||||
dst0[0] = src0[0];
|
|
||||||
dst1[0] = src1[0];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void arithm_flip_cols(__global const uchar* srcptr, int srcstep, int srcoffset,
|
__kernel void arithm_flip_cols(__global const uchar * srcptr, int src_step, int src_offset,
|
||||||
__global uchar* dstptr, int dststep, int dstoffset,
|
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||||
int rows, int cols, int thread_rows, int thread_cols)
|
int rows, int cols, int thread_rows, int thread_cols)
|
||||||
{
|
{
|
||||||
int x = get_global_id(0);
|
int x = get_global_id(0);
|
||||||
@ -92,13 +94,10 @@ __kernel void arithm_flip_cols(__global const uchar* srcptr, int srcstep, int sr
|
|||||||
if (x < thread_cols && y < rows)
|
if (x < thread_cols && y < rows)
|
||||||
{
|
{
|
||||||
int x1 = cols - x - 1;
|
int x1 = cols - x - 1;
|
||||||
__global const type* src0 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x, sizeoftype, srcoffset)));
|
T src0 = loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset)));
|
||||||
__global const type* src1 = (__global const type*)(srcptr + mad24(y, srcstep, mad24(x1, sizeoftype, srcoffset)));
|
T src1 = loadpix(srcptr + mad24(y, src_step, mad24(x1, TSIZE, src_offset)));
|
||||||
|
|
||||||
__global type* dst0 = (__global type*)(dstptr + mad24(y, dststep, mad24(x1, sizeoftype, dstoffset)));
|
storepix(src0, dstptr + mad24(y, dst_step, mad24(x1, TSIZE, dst_offset)));
|
||||||
__global type* dst1 = (__global type*)(dstptr + mad24(y, dststep, mad24(x, sizeoftype, dstoffset)));
|
storepix(src1, dstptr + mad24(y, dst_step, mad24(x, TSIZE, dst_offset)));
|
||||||
|
|
||||||
dst1[0] = src1[0];
|
|
||||||
dst0[0] = src0[0];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user