cv::transpose
This commit is contained in:
@@ -3002,8 +3002,8 @@ static bool ocl_transpose( InputArray _src, OutputArray _dst )
|
|||||||
k.args(ocl::KernelArg::ReadOnly(src),
|
k.args(ocl::KernelArg::ReadOnly(src),
|
||||||
ocl::KernelArg::WriteOnlyNoSize(dst));
|
ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||||
|
|
||||||
size_t localsize[3] = { TILE_DIM, BLOCK_ROWS, 1 };
|
size_t localsize[2] = { TILE_DIM, BLOCK_ROWS };
|
||||||
size_t globalsize[3] = { src.cols, inplace ? src.rows : divUp(src.rows, TILE_DIM) * BLOCK_ROWS, 1 };
|
size_t globalsize[2] = { src.cols, inplace ? src.rows : divUp(src.rows, TILE_DIM) * BLOCK_ROWS };
|
||||||
|
|
||||||
return k.run(2, globalsize, localsize, false);
|
return k.run(2, globalsize, localsize, false);
|
||||||
}
|
}
|
||||||
|
@@ -53,7 +53,7 @@
|
|||||||
#define TSIZE ((int)sizeof(T1)*3)
|
#define TSIZE ((int)sizeof(T1)*3)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LDS_STEP TILE_DIM
|
#define LDS_STEP (TILE_DIM + 1)
|
||||||
|
|
||||||
__kernel void transpose(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
__kernel void transpose(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||||
__global uchar * dstptr, int dst_step, int dst_offset)
|
__global uchar * dstptr, int dst_step, int dst_offset)
|
||||||
@@ -90,6 +90,7 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
{
|
{
|
||||||
int index_src = mad24(y, src_step, mad24(x, TSIZE, src_offset));
|
int index_src = mad24(y, src_step, mad24(x, TSIZE, src_offset));
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS)
|
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS)
|
||||||
if (y + i < src_rows)
|
if (y + i < src_rows)
|
||||||
{
|
{
|
||||||
@@ -103,6 +104,7 @@ __kernel void transpose(__global const uchar * srcptr, int src_step, int src_off
|
|||||||
{
|
{
|
||||||
int index_dst = mad24(y_index, dst_step, mad24(x_index, TSIZE, dst_offset));
|
int index_dst = mad24(y_index, dst_step, mad24(x_index, TSIZE, dst_offset));
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS)
|
for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS)
|
||||||
if ((y_index + i) < src_cols)
|
if ((y_index + i) < src_cols)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user