Merge pull request #1809 from ilya-lavrenov:ocl_resize_nn
This commit is contained in:
commit
3f927abb4d
@ -282,96 +282,63 @@ namespace cv
|
||||
|
||||
static void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation)
|
||||
{
|
||||
CV_Assert( (src.channels() == dst.channels()) );
|
||||
Context *clCxt = src.clCxt;
|
||||
float ifx = 1. / fx;
|
||||
float ify = 1. / fy;
|
||||
double ifx_d = 1. / fx;
|
||||
double ify_d = 1. / fy;
|
||||
int srcStep_in_pixel = src.step1() / src.oclchannels();
|
||||
int srcoffset_in_pixel = src.offset / src.elemSize();
|
||||
int dstStep_in_pixel = dst.step1() / dst.oclchannels();
|
||||
int dstoffset_in_pixel = dst.offset / dst.elemSize();
|
||||
float ifx = 1.f / fx, ify = 1.f / fy;
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
int ocn = interpolation == INTER_LINEAR ? dst.oclchannels() : -1;
|
||||
int depth = interpolation == INTER_LINEAR ? dst.depth() : -1;
|
||||
|
||||
string kernelName;
|
||||
if (interpolation == INTER_LINEAR)
|
||||
kernelName = "resizeLN";
|
||||
else if (interpolation == INTER_NEAREST)
|
||||
kernelName = "resizeNN";
|
||||
const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
|
||||
std::string kernelName = std::string("resize") + interMap[interpolation];
|
||||
|
||||
const char * const typeMap[] = { "uchar", "uchar", "ushort", "ushort", "int", "int", "double" };
|
||||
const char * const channelMap[] = { "" , "", "2", "4", "4" };
|
||||
std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[dst.depth()], channelMap[dst.oclchannels()]);
|
||||
|
||||
//TODO: improve this kernel
|
||||
size_t blkSizeX = 16, blkSizeY = 16;
|
||||
size_t glbSizeX;
|
||||
if (src.type() == CV_8UC1)
|
||||
if (src.type() == CV_8UC1 && interpolation == INTER_LINEAR)
|
||||
{
|
||||
size_t cols = (dst.cols + dst.offset % 4 + 3) / 4;
|
||||
glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
|
||||
}
|
||||
else
|
||||
glbSizeX = dst.cols % blkSizeX == 0 && dst.cols != 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
|
||||
glbSizeX = dst.cols;
|
||||
|
||||
size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
|
||||
size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
|
||||
size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
|
||||
size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
|
||||
size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
|
||||
|
||||
vector< pair<size_t, const void *> > args;
|
||||
if (interpolation == INTER_NEAREST)
|
||||
{
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||
{
|
||||
args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
|
||||
args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
|
||||
}
|
||||
else
|
||||
{
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
|
||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
|
||||
args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
|
||||
args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
|
||||
ocn, depth, buildOption.c_str());
|
||||
}
|
||||
|
||||
openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
|
||||
}
|
||||
|
||||
void resize(const oclMat &src, oclMat &dst, Size dsize,
|
||||
double fx, double fy, int interpolation)
|
||||
void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation)
|
||||
{
|
||||
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
|
||||
|| src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
|
||||
CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
|
||||
CV_Assert( src.size().area() > 0 );
|
||||
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
|
||||
CV_Assert(dsize.area() > 0 || (fx > 0 && fy > 0));
|
||||
|
||||
if (!(dsize == Size()) && (fx > 0 && fy > 0))
|
||||
if (dsize.width != (int)(src.cols * fx) || dsize.height != (int)(src.rows * fy))
|
||||
CV_Error(CV_StsUnmatchedSizes, "invalid dsize and fx, fy!");
|
||||
|
||||
if ( dsize == Size() )
|
||||
if (dsize.area() == 0)
|
||||
{
|
||||
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
|
||||
CV_Assert(dsize.area() > 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
fx = (double)dsize.width / src.cols;
|
||||
@ -380,13 +347,7 @@ namespace cv
|
||||
|
||||
dst.create(dsize, src.type());
|
||||
|
||||
if ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR )
|
||||
{
|
||||
resize_gpu( src, dst, fx, fy, interpolation);
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Error(CV_StsUnsupportedFormat, "Non-supported interpolation method");
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
@ -45,7 +45,7 @@
|
||||
|
||||
|
||||
// resize kernel
|
||||
// Currently, CV_8UC1 CV_8UC4 CV_32FC1 and CV_32FC4are supported.
|
||||
// Currently, CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 are supported.
|
||||
// We shall support other types later if necessary.
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
@ -54,20 +54,18 @@
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define F double
|
||||
#else
|
||||
#define F float
|
||||
#endif
|
||||
|
||||
|
||||
#define INTER_RESIZE_COEF_BITS 11
|
||||
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
|
||||
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
|
||||
#define CAST_SCALE (1.0f/(1<<CAST_BITS))
|
||||
#define INC(x,l) ((x+1) >= (l) ? (x):((x)+1))
|
||||
|
||||
#ifdef LN
|
||||
|
||||
__kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restrict src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int gx = get_global_id(0);
|
||||
@ -75,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
|
||||
float4 sx, u, xf;
|
||||
int4 x, DX;
|
||||
gx = (gx<<2) - (dstoffset_in_pixel&3);
|
||||
gx = (gx<<2) - (dst_offset&3);
|
||||
DX = (int4)(gx, gx+1, gx+2, gx+3);
|
||||
sx = (convert_float4(DX) + 0.5f) * ifx - 0.5f;
|
||||
xf = floor(sx);
|
||||
@ -113,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
int4 val1, val2, val;
|
||||
int4 sdata1, sdata2, sdata3, sdata4;
|
||||
|
||||
int4 pos1 = mad24((int4)y, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel);
|
||||
int4 pos2 = mad24((int4)y, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel);
|
||||
int4 pos3 = mad24((int4)y_, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel);
|
||||
int4 pos4 = mad24((int4)y_, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel);
|
||||
int4 pos1 = mad24((int4)y, (int4)src_step, x+(int4)src_offset);
|
||||
int4 pos2 = mad24((int4)y, (int4)src_step, x_+(int4)src_offset);
|
||||
int4 pos3 = mad24((int4)y_, (int4)src_step, x+(int4)src_offset);
|
||||
int4 pos4 = mad24((int4)y_, (int4)src_step, x_+(int4)src_offset);
|
||||
|
||||
sdata1.s0 = src[pos1.s0];
|
||||
sdata1.s1 = src[pos1.s1];
|
||||
@ -144,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
|
||||
val = ((val + (1<<(CAST_BITS-1))) >> CAST_BITS);
|
||||
|
||||
pos4 = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel);
|
||||
pos4 = mad24(dy, dst_step, gx+dst_offset);
|
||||
pos4.y++;
|
||||
pos4.z+=2;
|
||||
pos4.w+=3;
|
||||
uchar4 uval = convert_uchar4_sat(val);
|
||||
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0);
|
||||
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dst_offset&3)==0);
|
||||
if(con)
|
||||
{
|
||||
*(__global uchar4*)(dst + pos4.x)=uval;
|
||||
@ -176,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
|
||||
}
|
||||
|
||||
__kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
@ -202,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
|
||||
int y_ = INC(y,src_rows);
|
||||
int x_ = INC(x,src_cols);
|
||||
int4 srcpos;
|
||||
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.x = mad24(y, src_step, x+src_offset);
|
||||
srcpos.y = mad24(y, src_step, x_+src_offset);
|
||||
srcpos.z = mad24(y_, src_step, x+src_offset);
|
||||
srcpos.w = mad24(y_, src_step, x_+src_offset);
|
||||
int4 data0 = convert_int4(src[srcpos.x]);
|
||||
int4 data1 = convert_int4(src[srcpos.y]);
|
||||
int4 data2 = convert_int4(src[srcpos.z]);
|
||||
int4 data3 = convert_int4(src[srcpos.w]);
|
||||
int4 val = mul24((int4)mul24(U1, V1) , data0) + mul24((int4)mul24(U, V1) , data1)
|
||||
+mul24((int4)mul24(U1, V) , data2)+mul24((int4)mul24(U, V) , data3);
|
||||
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
|
||||
int dstpos = mad24(dy, dst_step, dx+dst_offset);
|
||||
uchar4 uval = convert_uchar4((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dstpos] = uval;
|
||||
}
|
||||
|
||||
__kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
@ -239,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
|
||||
float u1 = 1.f-u;
|
||||
float v1 = 1.f-v;
|
||||
int4 srcpos;
|
||||
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.x = mad24(y, src_step, x+src_offset);
|
||||
srcpos.y = mad24(y, src_step, x_+src_offset);
|
||||
srcpos.z = mad24(y_, src_step, x+src_offset);
|
||||
srcpos.w = mad24(y_, src_step, x_+src_offset);
|
||||
float data0 = src[srcpos.x];
|
||||
float data1 = src[srcpos.y];
|
||||
float data2 = src[srcpos.z];
|
||||
@ -252,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
|
||||
float val2 = u1 * data2 +
|
||||
u * data3;
|
||||
float val = v1 * val1 + v * val2;
|
||||
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
|
||||
int dstpos = mad24(dy, dst_step, dx+dst_offset);
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dstpos] = val;
|
||||
}
|
||||
|
||||
__kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
@ -278,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
float u1 = 1.f-u;
|
||||
float v1 = 1.f-v;
|
||||
int4 srcpos;
|
||||
srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
|
||||
srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
|
||||
srcpos.x = mad24(y, src_step, x+src_offset);
|
||||
srcpos.y = mad24(y, src_step, x_+src_offset);
|
||||
srcpos.z = mad24(y_, src_step, x+src_offset);
|
||||
srcpos.w = mad24(y_, src_step, x_+src_offset);
|
||||
float4 s_data1, s_data2, s_data3, s_data4;
|
||||
s_data1 = src[srcpos.x];
|
||||
s_data2 = src[srcpos.y];
|
||||
@ -289,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
s_data4 = src[srcpos.w];
|
||||
float4 val = u1 * v1 * s_data1 + u * v1 * s_data2
|
||||
+u1 * v *s_data3 + u * v *s_data4;
|
||||
int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
|
||||
int dstpos = mad24(dy, dst_step, dx+dst_offset);
|
||||
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dstpos] = val;
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C1_D0(__global uchar * dst, __global uchar * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
{
|
||||
int gx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
#elif defined NN
|
||||
|
||||
gx = (gx<<2) - (dstoffset_in_pixel&3);
|
||||
//int4 GX = (int4)(gx, gx+1, gx+2, gx+3);
|
||||
|
||||
int4 sx;
|
||||
int sy;
|
||||
F ss1 = gx*ifx;
|
||||
F ss2 = (gx+1)*ifx;
|
||||
F ss3 = (gx+2)*ifx;
|
||||
F ss4 = (gx+3)*ifx;
|
||||
F s5 = dy * ify;
|
||||
sx.s0 = min((int)floor(ss1), src_cols-1);
|
||||
sx.s1 = min((int)floor(ss2), src_cols-1);
|
||||
sx.s2 = min((int)floor(ss3), src_cols-1);
|
||||
sx.s3 = min((int)floor(ss4), src_cols-1);
|
||||
sy = min((int)floor(s5), src_rows-1);
|
||||
|
||||
uchar4 val;
|
||||
int4 pos = mad24((int4)sy, (int4)srcstep_in_pixel, sx+(int4)srcoffset_in_pixel);
|
||||
val.s0 = src[pos.s0];
|
||||
val.s1 = src[pos.s1];
|
||||
val.s2 = src[pos.s2];
|
||||
val.s3 = src[pos.s3];
|
||||
|
||||
//__global uchar4* d = (__global uchar4*)(dst + dstoffset_in_pixel + dy * dststep_in_pixel + gx);
|
||||
//uchar4 dVal = *d;
|
||||
pos = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel);
|
||||
pos.y++;
|
||||
pos.z+=2;
|
||||
pos.w+=3;
|
||||
|
||||
int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0);
|
||||
if(con)
|
||||
{
|
||||
*(__global uchar4*)(dst + pos.x)=val;
|
||||
}
|
||||
else
|
||||
{
|
||||
if(gx >= 0 && gx < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.x]=val.x;
|
||||
}
|
||||
if(gx+1 >= 0 && gx+1 < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.y]=val.y;
|
||||
}
|
||||
if(gx+2 >= 0 && gx+2 < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.z]=val.z;
|
||||
}
|
||||
if(gx+3 >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows)
|
||||
{
|
||||
dst[pos.w]=val.w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
__kernel void resizeNN(__global T * dst, __global T * src,
|
||||
int dst_offset, int src_offset,int dst_step, int src_step,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
F s1 = dx*ifx;
|
||||
F s2 = dy*ify;
|
||||
int sx = fmin((float)floor(s1), (float)src_cols-1);
|
||||
int sy = fmin((float)floor(s2), (float)src_rows-1);
|
||||
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
|
||||
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
|
||||
if (dx < dst_cols && dy < dst_rows)
|
||||
{
|
||||
float s1 = dx * ifx, s2 = dy * ify;
|
||||
int sx = min(convert_int_sat_rtn(s1), src_cols - 1);
|
||||
int sy = min(convert_int_sat_rtn(s2), src_rows - 1);
|
||||
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
int dst_index = mad24(dy, dst_step, dx + dst_offset);
|
||||
int src_index = mad24(sy, src_step, sx + src_offset);
|
||||
|
||||
dst[dst_index] = src[src_index];
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C1_D5(__global float * dst, __global float * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
F s1 = dx*ifx;
|
||||
F s2 = dy*ify;
|
||||
int sx = fmin((float)floor(s1), (float)src_cols-1);
|
||||
int sy = fmin((float)floor(s2), (float)src_rows-1);
|
||||
|
||||
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
|
||||
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
|
||||
}
|
||||
|
||||
__kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
|
||||
int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
|
||||
int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
F s1 = dx*ifx;
|
||||
F s2 = dy*ify;
|
||||
int s_col = floor(s1);
|
||||
int s_row = floor(s2);
|
||||
int sx = min(s_col, src_cols-1);
|
||||
int sy = min(s_row, src_rows-1);
|
||||
int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
|
||||
int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
|
||||
|
||||
if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
|
||||
dst[dpos] = src[spos];
|
||||
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user