added area fast mode to ocl::resize
This commit is contained in:
parent
f70d63e4c9
commit
198cd1a40d
@ -191,7 +191,7 @@ typedef TestBaseWithParam<resizeAreaParams> resizeAreaFixture;
|
|||||||
PERF_TEST_P(resizeAreaFixture, resize,
|
PERF_TEST_P(resizeAreaFixture, resize,
|
||||||
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
::testing::Combine(OCL_TYPICAL_MAT_SIZES,
|
||||||
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
|
OCL_PERF_ENUM(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
|
||||||
::testing::Values(0.6, 0.3)))
|
::testing::Values(0.3, 0.5, 0.6)))
|
||||||
{
|
{
|
||||||
const resizeAreaParams params = GetParam();
|
const resizeAreaParams params = GetParam();
|
||||||
const Size srcSize = get<0>(params);
|
const Size srcSize = get<0>(params);
|
||||||
|
@ -318,28 +318,46 @@ namespace cv
|
|||||||
ofs_tab[dx] = k;
|
ofs_tab[dx] = k;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void computeResizeAreaFastTabs(int * dmap_tab, int * smap_tab, int scale, int dcols, int scol)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < dcols; ++i)
|
||||||
|
dmap_tab[i] = scale * i;
|
||||||
|
|
||||||
|
for (int i = 0, size = dcols * scale; i < size; ++i)
|
||||||
|
smap_tab[i] = std::min(scol - 1, i);
|
||||||
|
}
|
||||||
|
|
||||||
static void resize_gpu( const oclMat &src, oclMat &dst, double ifx, double ify, int interpolation)
|
static void resize_gpu( const oclMat &src, oclMat &dst, double ifx, double ify, int interpolation)
|
||||||
{
|
{
|
||||||
float ifxf = (float)ifx, ifyf = (float)ify;
|
float ifxf = (float)ifx, ifyf = (float)ify;
|
||||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||||
int ocn = interpolation == INTER_LINEAR ? dst.oclchannels() : -1;
|
int ocn = dst.oclchannels(), depth = dst.depth();
|
||||||
int depth = interpolation == INTER_LINEAR ? dst.depth() : -1;
|
|
||||||
|
|
||||||
const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
|
const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
|
||||||
std::string kernelName = std::string("resize") + interMap[interpolation];
|
std::string kernelName = std::string("resize") + interMap[interpolation];
|
||||||
|
|
||||||
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
const char * const typeMap[] = { "uchar", "char", "ushort", "short", "int", "float", "double" };
|
||||||
const char * const channelMap[] = { "" , "", "2", "4", "4" };
|
const char * const channelMap[] = { "" , "", "2", "4", "4" };
|
||||||
std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[dst.depth()], channelMap[dst.oclchannels()]);
|
std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[depth], channelMap[ocn]);
|
||||||
|
|
||||||
int wdepth = std::max(src.depth(), CV_32F);
|
int wdepth = std::max(src.depth(), CV_32F);
|
||||||
|
|
||||||
|
// check if fx, fy is integer and then we have inter area fast mode
|
||||||
|
int iscale_x = saturate_cast<int>(ifx);
|
||||||
|
int iscale_y = saturate_cast<int>(ify);
|
||||||
|
|
||||||
|
bool is_area_fast = std::abs(ifx - iscale_x) < DBL_EPSILON &&
|
||||||
|
std::abs(ify - iscale_y) < DBL_EPSILON;
|
||||||
|
if (is_area_fast)
|
||||||
|
wdepth = std::max(src.depth(), CV_32S);
|
||||||
|
|
||||||
if (interpolation != INTER_NEAREST)
|
if (interpolation != INTER_NEAREST)
|
||||||
{
|
{
|
||||||
buildOption += format(" -D WT=%s -D WTV=%s%s -D convertToWTV=convert_%s%s -D convertToT=convert_%s%s%s",
|
buildOption += format(" -D WT=%s -D WTV=%s%s -D convertToWTV=convert_%s%s -D convertToT=convert_%s%s%s",
|
||||||
typeMap[wdepth], typeMap[wdepth], channelMap[dst.oclchannels()],
|
typeMap[wdepth], typeMap[wdepth], channelMap[ocn],
|
||||||
typeMap[wdepth], channelMap[dst.oclchannels()],
|
typeMap[wdepth], channelMap[ocn],
|
||||||
typeMap[src.depth()], channelMap[dst.oclchannels()], src.depth() <= CV_32S ? "_sat_rte" : "");
|
typeMap[src.depth()], channelMap[ocn], src.depth() <= CV_32S ? "_sat_rte" : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t blkSizeX = 16, blkSizeY = 16;
|
size_t blkSizeX = 16, blkSizeY = 16;
|
||||||
@ -352,8 +370,29 @@ namespace cv
|
|||||||
else
|
else
|
||||||
glbSizeX = dst.cols;
|
glbSizeX = dst.cols;
|
||||||
|
|
||||||
static oclMat alphaOcl, mapOcl, tabofsOcl;
|
oclMat alphaOcl, mapOcl, tabofsOcl;
|
||||||
if (interpolation == INTER_AREA)
|
if (interpolation == INTER_AREA)
|
||||||
|
{
|
||||||
|
if (is_area_fast)
|
||||||
|
{
|
||||||
|
kernelName += "_FAST";
|
||||||
|
int wdepth2 = std::max(CV_32F, src.depth());
|
||||||
|
buildOption += format(" -D WT2V=%s%s -D convertToWT2V=convert_%s%s -D AREA_FAST -D XSCALE=%d -D YSCALE=%d -D SCALE=%f",
|
||||||
|
typeMap[wdepth2], channelMap[ocn], typeMap[wdepth2], channelMap[ocn],
|
||||||
|
iscale_x, iscale_y, 1.0f / (iscale_x * iscale_y));
|
||||||
|
|
||||||
|
int smap_tab_size = dst.cols * iscale_x + dst.rows * iscale_y;
|
||||||
|
AutoBuffer<int> dmap_tab(dst.cols + dst.rows), smap_tab(smap_tab_size);
|
||||||
|
int * dxmap_tab = dmap_tab, * dymap_tab = dxmap_tab + dst.cols;
|
||||||
|
int * sxmap_tab = smap_tab, * symap_tab = smap_tab + dst.cols * iscale_y;
|
||||||
|
|
||||||
|
computeResizeAreaFastTabs(dxmap_tab, sxmap_tab, iscale_x, dst.cols, src.cols);
|
||||||
|
computeResizeAreaFastTabs(dymap_tab, symap_tab, iscale_y, dst.rows, src.rows);
|
||||||
|
|
||||||
|
tabofsOcl = oclMat(1, dst.cols + dst.rows, CV_32SC1, (void *)dmap_tab);
|
||||||
|
mapOcl = oclMat(1, smap_tab_size, CV_32SC1, (void *)smap_tab);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
Size ssize = src.size(), dsize = dst.size();
|
Size ssize = src.size(), dsize = dst.size();
|
||||||
int xytab_size = (ssize.width + ssize.height) << 1;
|
int xytab_size = (ssize.width + ssize.height) << 1;
|
||||||
@ -373,6 +412,7 @@ namespace cv
|
|||||||
mapOcl = oclMat(1, xytab_size, CV_32SC1, (void *)_xymap_tab);
|
mapOcl = oclMat(1, xytab_size, CV_32SC1, (void *)_xymap_tab);
|
||||||
tabofsOcl = oclMat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab);
|
tabofsOcl = oclMat(1, tabofs_size, CV_32SC1, (void *)_xyofs_tab);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
|
size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
|
||||||
size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
|
size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
|
||||||
@ -400,12 +440,18 @@ namespace cv
|
|||||||
args.push_back( make_pair(sizeof(cl_float), (void *)&ifyf));
|
args.push_back( make_pair(sizeof(cl_float), (void *)&ifyf));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (interpolation == INTER_AREA)
|
// precomputed tabs
|
||||||
{
|
if (!tabofsOcl.empty())
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&tabofsOcl.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&tabofsOcl.data));
|
||||||
|
|
||||||
|
if (!mapOcl.empty())
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&mapOcl.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&mapOcl.data));
|
||||||
|
|
||||||
|
if (!alphaOcl.empty())
|
||||||
args.push_back( make_pair(sizeof(cl_mem), (void *)&alphaOcl.data));
|
args.push_back( make_pair(sizeof(cl_mem), (void *)&alphaOcl.data));
|
||||||
}
|
|
||||||
|
ocn = interpolation == INTER_LINEAR ? ocn : -1;
|
||||||
|
depth = interpolation == INTER_LINEAR ? depth : -1;
|
||||||
|
|
||||||
openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
|
openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
|
||||||
ocn, depth, buildOption.c_str());
|
ocn, depth, buildOption.c_str());
|
||||||
|
@ -315,7 +315,44 @@ __kernel void resizeNN(__global T * dst, __global T * src,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#elif AREA
|
#elif defined AREA
|
||||||
|
|
||||||
|
#ifdef AREA_FAST
|
||||||
|
|
||||||
|
__kernel void resizeAREA_FAST(__global T * dst, __global T * src,
|
||||||
|
int dst_offset, int src_offset, int dst_step, int src_step,
|
||||||
|
int src_cols, int src_rows, int dst_cols, int dst_rows, WT ifx, WT ify,
|
||||||
|
__global const int * dmap_tab, __global const int * smap_tab)
|
||||||
|
{
|
||||||
|
int dx = get_global_id(0);
|
||||||
|
int dy = get_global_id(1);
|
||||||
|
|
||||||
|
if (dx < dst_cols && dy < dst_rows)
|
||||||
|
{
|
||||||
|
int dst_index = mad24(dy, dst_step, dst_offset + dx);
|
||||||
|
|
||||||
|
__global const int * xmap_tab = dmap_tab;
|
||||||
|
__global const int * ymap_tab = dmap_tab + dst_cols;
|
||||||
|
__global const int * sxmap_tab = smap_tab;
|
||||||
|
__global const int * symap_tab = smap_tab + XSCALE * dst_cols;
|
||||||
|
|
||||||
|
int sx = xmap_tab[dx], sy = ymap_tab[dy];
|
||||||
|
WTV sum = (WTV)(0);
|
||||||
|
|
||||||
|
#pragma unroll
|
||||||
|
for (int y = 0; y < YSCALE; ++y)
|
||||||
|
{
|
||||||
|
int src_index = mad24(symap_tab[y + sy], src_step, src_offset);
|
||||||
|
#pragma unroll
|
||||||
|
for (int x = 0; x < XSCALE; ++x)
|
||||||
|
sum += convertToWTV(src[src_index + sxmap_tab[sx + x]]);
|
||||||
|
}
|
||||||
|
|
||||||
|
dst[dst_index] = convertToT(convertToWT2V(sum) * (WT2V)(SCALE));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
__kernel void resizeAREA(__global T * dst, __global T * src,
|
__kernel void resizeAREA(__global T * dst, __global T * src,
|
||||||
int dst_offset, int src_offset, int dst_step, int src_step,
|
int dst_offset, int src_offset, int dst_step, int src_step,
|
||||||
@ -364,3 +401,5 @@ __kernel void resizeAREA(__global T * dst, __global T * src,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
@ -231,7 +231,7 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
|
|||||||
return final_test_result;
|
return final_test_result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
|
void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
|
||||||
{
|
{
|
||||||
Mat diff, diff_thresh;
|
Mat diff, diff_thresh;
|
||||||
absdiff(gold, actual, diff);
|
absdiff(gold, actual, diff);
|
||||||
@ -240,10 +240,18 @@ void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow)
|
|||||||
|
|
||||||
if (alwaysShow || cv::countNonZero(diff_thresh.reshape(1)) > 0)
|
if (alwaysShow || cv::countNonZero(diff_thresh.reshape(1)) > 0)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
|
std::cout << "Src: " << std::endl << src << std::endl;
|
||||||
|
std::cout << "Reference: " << std::endl << gold << std::endl;
|
||||||
|
std::cout << "OpenCL: " << std::endl << actual << std::endl;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namedWindow("src", WINDOW_NORMAL);
|
||||||
namedWindow("gold", WINDOW_NORMAL);
|
namedWindow("gold", WINDOW_NORMAL);
|
||||||
namedWindow("actual", WINDOW_NORMAL);
|
namedWindow("actual", WINDOW_NORMAL);
|
||||||
namedWindow("diff", WINDOW_NORMAL);
|
namedWindow("diff", WINDOW_NORMAL);
|
||||||
|
|
||||||
|
imshow("src", src);
|
||||||
imshow("gold", gold);
|
imshow("gold", gold);
|
||||||
imshow("actual", actual);
|
imshow("actual", actual);
|
||||||
imshow("diff", diff);
|
imshow("diff", diff);
|
||||||
|
@ -52,7 +52,7 @@ extern int LOOP_TIMES;
|
|||||||
|
|
||||||
namespace cvtest {
|
namespace cvtest {
|
||||||
|
|
||||||
void showDiff(const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);
|
void showDiff(const Mat& src, const Mat& gold, const Mat& actual, double eps, bool alwaysShow = false);
|
||||||
|
|
||||||
cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi);
|
cv::ocl::oclMat createMat_ocl(cv::RNG& rng, Size size, int type, bool useRoi);
|
||||||
cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi);
|
cv::ocl::oclMat loadMat_ocl(cv::RNG& rng, const Mat& m, bool useRoi);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user