Merge pull request #2621 from arkunze:pullreq/140319-resize-b
This commit is contained in:
@@ -2037,15 +2037,6 @@ static void ocl_computeResizeAreaTabs(int ssize, int dsize, double scale, int *
|
||||
ofs_tab[dx] = k;
|
||||
}
|
||||
|
||||
static void ocl_computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
|
||||
{
|
||||
for (int i = 0; i < dcols; ++i)
|
||||
dmap_tab[i] = scale * i;
|
||||
|
||||
for (int i = 0, size = dcols * scale; i < size; ++i)
|
||||
smap_tab[i] = std::min(scol - 1, i);
|
||||
}
|
||||
|
||||
static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
double fx, double fy, int interpolation)
|
||||
{
|
||||
@@ -2075,7 +2066,39 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
ocl::Kernel k;
|
||||
size_t globalsize[] = { dst.cols, dst.rows };
|
||||
|
||||
if (interpolation == INTER_LINEAR)
|
||||
ocl::Image2D srcImage;
|
||||
|
||||
// See if this could be done with a sampler. We stick with integer
|
||||
// datatypes because the observed error is low.
|
||||
bool useSampler = (interpolation == INTER_LINEAR && ocl::Device::getDefault().imageSupport() &&
|
||||
ocl::Image2D::canCreateAlias(src) && depth <= 4 &&
|
||||
ocl::Image2D::isFormatSupported(depth, cn, true));
|
||||
if (useSampler)
|
||||
{
|
||||
int wdepth = std::max(depth, CV_32S);
|
||||
char buf[2][32];
|
||||
cv::String compileOpts = format("-D USE_SAMPLER -D depth=%d -D T=%s -D T1=%s "
|
||||
"-D convertToDT=%s -D cn=%d",
|
||||
depth, ocl::typeToStr(type), ocl::typeToStr(depth),
|
||||
ocl::convertTypeStr(wdepth, depth, cn, buf[1]),
|
||||
cn);
|
||||
k.create("resizeSampler", ocl::imgproc::resize_oclsrc, compileOpts);
|
||||
|
||||
if(k.empty())
|
||||
{
|
||||
useSampler = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Convert the input into an OpenCL image type, using normalized channel data types
|
||||
// and aliasing the UMat.
|
||||
srcImage = ocl::Image2D(src, true, true);
|
||||
k.args(srcImage, ocl::KernelArg::WriteOnly(dst),
|
||||
(float)inv_fx, (float)inv_fy);
|
||||
}
|
||||
}
|
||||
|
||||
if (interpolation == INTER_LINEAR && !useSampler)
|
||||
{
|
||||
char buf[2][32];
|
||||
|
||||
@@ -2180,25 +2203,14 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
{
|
||||
int wdepth2 = std::max(CV_32F, depth), wtype2 = CV_MAKE_TYPE(wdepth2, cn);
|
||||
buildOption = buildOption + format(" -D convertToT=%s -D WT2V=%s -D convertToWT2V=%s -D INTER_AREA_FAST"
|
||||
" -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff",
|
||||
ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]),
|
||||
ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]),
|
||||
iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
|
||||
" -D XSCALE=%d -D YSCALE=%d -D SCALE=%ff",
|
||||
ocl::convertTypeStr(wdepth2, depth, cn, cvt[0]),
|
||||
ocl::typeToStr(wtype2), ocl::convertTypeStr(wdepth, wdepth2, cn, cvt[1]),
|
||||
iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
|
||||
|
||||
k.create("resizeAREA_FAST", ocl::imgproc::resize_oclsrc, buildOption);
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
|
||||
AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
|
||||
int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
|
||||
int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
|
||||
|
||||
ocl_computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
|
||||
ocl_computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
|
||||
|
||||
Mat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab).copyTo(dmap);
|
||||
Mat(1, smap_tab_size, CV_32SC1, (void *)smap_tab).copyTo(smap);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2228,7 +2240,7 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnly(src), dstarg = ocl::KernelArg::WriteOnly(dst);
|
||||
|
||||
if (is_area_fast)
|
||||
k.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(dmap), ocl::KernelArg::PtrReadOnly(smap));
|
||||
k.args(srcarg, dstarg);
|
||||
else
|
||||
k.args(srcarg, dstarg, inv_fxf, inv_fyf, ocl::KernelArg::PtrReadOnly(tabofsOcl),
|
||||
ocl::KernelArg::PtrReadOnly(mapOcl), ocl::KernelArg::PtrReadOnly(alphaOcl));
|
||||
|
||||
@@ -67,7 +67,64 @@
|
||||
#define TSIZE (int)sizeof(T1)*cn
|
||||
#endif
|
||||
|
||||
#ifdef INTER_LINEAR_INTEGER
|
||||
#if defined USE_SAMPLER
|
||||
|
||||
#if cn == 1
|
||||
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x
|
||||
#elif cn == 2
|
||||
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy
|
||||
#elif cn == 3
|
||||
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz
|
||||
#elif cn == 4
|
||||
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z)
|
||||
#endif
|
||||
|
||||
#define __CAT(x, y) x##y
|
||||
#define CAT(x, y) __CAT(x, y)
|
||||
#define INTERMEDIATE_TYPE CAT(float, cn)
|
||||
#define float1 float
|
||||
|
||||
#if depth == 0
|
||||
#define RESULT_SCALE 255.0f
|
||||
#elif depth == 1
|
||||
#define RESULT_SCALE 127.0f
|
||||
#elif depth == 2
|
||||
#define RESULT_SCALE 65535.0f
|
||||
#elif depth == 3
|
||||
#define RESULT_SCALE 32767.0f
|
||||
#else
|
||||
#define RESULT_SCALE 1.0f
|
||||
#endif
|
||||
|
||||
__kernel void resizeSampler(__read_only image2d_t srcImage,
|
||||
__global uchar* dstptr, int dststep, int dstoffset,
|
||||
int dstrows, int dstcols,
|
||||
float ifx, float ify)
|
||||
{
|
||||
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
|
||||
CLK_ADDRESS_CLAMP_TO_EDGE |
|
||||
CLK_FILTER_LINEAR;
|
||||
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
|
||||
float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify);
|
||||
|
||||
INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy));
|
||||
|
||||
#if depth <= 4
|
||||
T uval = convertToDT(round(intermediate * RESULT_SCALE));
|
||||
#else
|
||||
T uval = convertToDT(intermediate * RESULT_SCALE);
|
||||
#endif
|
||||
|
||||
if(dx < dstcols && dy < dstrows)
|
||||
{
|
||||
storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE));
|
||||
}
|
||||
}
|
||||
|
||||
#elif defined INTER_LINEAR_INTEGER
|
||||
|
||||
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
@@ -185,8 +242,7 @@ __kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offs
|
||||
#ifdef INTER_AREA_FAST
|
||||
|
||||
__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
__global const int * dmap_tab, __global const int * smap_tab)
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
{
|
||||
int dx = get_global_id(0);
|
||||
int dy = get_global_id(1);
|
||||
@@ -195,21 +251,21 @@ __kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_
|
||||
{
|
||||
int dst_index = mad24(dy, dst_step, dst_offset);
|
||||
|
||||
__global const int * xmap_tab = dmap_tab;
|
||||
__global const int * ymap_tab = dmap_tab + dst_cols;
|
||||
__global const int * sxmap_tab = smap_tab;
|
||||
__global const int * symap_tab = smap_tab + XSCALE * dst_cols;
|
||||
|
||||
int sx = xmap_tab[dx], sy = ymap_tab[dy];
|
||||
int sx = XSCALE * dx;
|
||||
int sy = YSCALE * dy;
|
||||
WTV sum = (WTV)(0);
|
||||
|
||||
#pragma unroll
|
||||
for (int y = 0; y < YSCALE; ++y)
|
||||
for (int py = 0; py < YSCALE; ++py)
|
||||
{
|
||||
int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
|
||||
int y = min(sy + py, src_rows - 1);
|
||||
int src_index = mad24(y, src_step, src_offset);
|
||||
#pragma unroll
|
||||
for (int x = 0; x < XSCALE; ++x)
|
||||
sum += convertToWTV(loadpix(src + mad24(sxmap_tab[sx + x], TSIZE, src_index)));
|
||||
for (int px = 0; px < XSCALE; ++px)
|
||||
{
|
||||
int x = min(sx + px, src_cols - 1);
|
||||
sum += convertToWTV(loadpix(src + src_index + x*TSIZE));
|
||||
}
|
||||
}
|
||||
|
||||
storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
|
||||
|
||||
Reference in New Issue
Block a user