Merge pull request #2529 from ilya-lavrenov:tapi_filter2D
This commit is contained in:
commit
a2f8a93bf5
@ -414,24 +414,23 @@ const String& getBuildInformation()
|
|||||||
|
|
||||||
String format( const char* fmt, ... )
|
String format( const char* fmt, ... )
|
||||||
{
|
{
|
||||||
char buf[1024];
|
AutoBuffer<char, 1024> buf;
|
||||||
|
|
||||||
va_list va;
|
for ( ; ; )
|
||||||
va_start(va, fmt);
|
|
||||||
int len = vsnprintf(buf, sizeof(buf), fmt, va);
|
|
||||||
va_end(va);
|
|
||||||
|
|
||||||
if (len >= (int)sizeof(buf))
|
|
||||||
{
|
{
|
||||||
String s(len, '\0');
|
va_list va;
|
||||||
va_start(va, fmt);
|
va_start(va, fmt);
|
||||||
len = vsnprintf((char*)s.c_str(), len + 1, fmt, va);
|
int bsize = static_cast<int>(buf.size()),
|
||||||
(void)len;
|
len = vsnprintf((char *)buf, bsize, fmt, va);
|
||||||
va_end(va);
|
va_end(va);
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
return String(buf, len);
|
if (len < 0 || len >= bsize)
|
||||||
|
{
|
||||||
|
buf.resize(std::max(bsize << 1, len + 1));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return String((char *)buf, len);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
String tempfile( const char* suffix )
|
String tempfile( const char* suffix )
|
||||||
|
@ -42,7 +42,6 @@
|
|||||||
|
|
||||||
#include "precomp.hpp"
|
#include "precomp.hpp"
|
||||||
#include "opencl_kernels.hpp"
|
#include "opencl_kernels.hpp"
|
||||||
#include <sstream>
|
|
||||||
|
|
||||||
/****************************************************************************************\
|
/****************************************************************************************\
|
||||||
Base Image Filter
|
Base Image Filter
|
||||||
@ -3197,6 +3196,8 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
|||||||
size_t tryWorkItems = maxWorkItemSizes[0];
|
size_t tryWorkItems = maxWorkItemSizes[0];
|
||||||
char cvt[2][40];
|
char cvt[2][40];
|
||||||
|
|
||||||
|
String kerStr = ocl::kernelToStr(kernelMatDataFloat, CV_32F);
|
||||||
|
|
||||||
for ( ; ; )
|
for ( ; ; )
|
||||||
{
|
{
|
||||||
size_t BLOCK_SIZE = tryWorkItems;
|
size_t BLOCK_SIZE = tryWorkItems;
|
||||||
@ -3226,14 +3227,14 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
|||||||
|
|
||||||
String opts = format("-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
|
String opts = format("-D LOCAL_SIZE=%d -D BLOCK_SIZE_Y=%d -D cn=%d "
|
||||||
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
|
"-D ANCHOR_X=%d -D ANCHOR_Y=%d -D KERNEL_SIZE_X=%d -D KERNEL_SIZE_Y=%d "
|
||||||
"-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s "
|
"-D KERNEL_SIZE_Y2_ALIGNED=%d -D %s -D %s -D %s%s%s "
|
||||||
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
|
"-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D WT=%s -D WT1=%s "
|
||||||
"-D convertToWT=%s -D convertToDstT=%s",
|
"-D convertToWT=%s -D convertToDstT=%s",
|
||||||
(int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, cn, anchor.x, anchor.y,
|
(int)BLOCK_SIZE, (int)BLOCK_SIZE_Y, cn, anchor.x, anchor.y,
|
||||||
ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType],
|
ksize.width, ksize.height, kernel_size_y2_aligned, borderMap[borderType],
|
||||||
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
|
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
|
||||||
isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
|
isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
|
||||||
doubleSupport ? " -D DOUBLE_SUPPORT" : "",
|
doubleSupport ? " -D DOUBLE_SUPPORT" : "", kerStr.c_str(),
|
||||||
ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype),
|
ocl::typeToStr(type), ocl::typeToStr(sdepth), ocl::typeToStr(dtype),
|
||||||
ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth),
|
ocl::typeToStr(ddepth), ocl::typeToStr(wtype), ocl::typeToStr(wdepth),
|
||||||
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
|
ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]),
|
||||||
@ -3255,7 +3256,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
|||||||
}
|
}
|
||||||
|
|
||||||
_dst.create(sz, dtype);
|
_dst.create(sz, dtype);
|
||||||
UMat dst = _dst.getUMat(), kernalDataUMat(kernelMatDataFloat, true);
|
UMat dst = _dst.getUMat();
|
||||||
|
|
||||||
int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
|
int srcOffsetX = (int)((src.offset % src.step) / src.elemSize());
|
||||||
int srcOffsetY = (int)(src.offset / src.step);
|
int srcOffsetY = (int)(src.offset / src.step);
|
||||||
@ -3263,8 +3264,7 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
|||||||
int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height);
|
int srcEndY = (isolated ? (srcOffsetY + sz.height) : wholeSize.height);
|
||||||
|
|
||||||
k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY,
|
k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffsetX, srcOffsetY,
|
||||||
srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst),
|
srcEndX, srcEndY, ocl::KernelArg::WriteOnly(dst), (float)delta);
|
||||||
ocl::KernelArg::PtrReadOnly(kernalDataUMat), (float)delta);
|
|
||||||
|
|
||||||
return k.run(2, globalsize, localsize, false);
|
return k.run(2, globalsize, localsize, false);
|
||||||
}
|
}
|
||||||
|
@ -200,8 +200,11 @@ inline WT readSrcPixel(int2 pos, __global const uchar * srcptr, int src_step, co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define DIG(a) a,
|
||||||
|
__constant WT1 kernelData[] = { COEFF };
|
||||||
|
|
||||||
__kernel void filter2D(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
|
__kernel void filter2D(__global const uchar * srcptr, int src_step, int srcOffsetX, int srcOffsetY, int srcEndX, int srcEndY,
|
||||||
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, __constant WT1 * kernelData, float delta)
|
__global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols, float delta)
|
||||||
{
|
{
|
||||||
const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
|
const struct RectCoords srcCoords = { srcOffsetX, srcOffsetY, srcEndX, srcEndY }; // for non-isolated border: offsetX, offsetY, wholeX, wholeY
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user