Added ocl::pyrUp to T-API
This commit is contained in:
parent
ac230cd2ae
commit
febe528363
File diff suppressed because it is too large
Load Diff
159
modules/imgproc/src/opencl/pyr_up.cl
Normal file
159
modules/imgproc/src/opencl/pyr_up.cl
Normal file
@ -0,0 +1,159 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Zhang Chunpeng chunpeng@multicorewareinc.com
|
||||
// Dachuan Zhao, dachuan@multicorewareinc.com
|
||||
// Yao Wang, yao@multicorewareinc.com
|
||||
// Peng Xiao, pengxiao@outlook.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
//////////////////////// Generic PyrUp //////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define noconvert
|
||||
|
||||
|
||||
__kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
const int lsizex = get_local_size(0);
|
||||
const int lsizey = get_local_size(1);
|
||||
|
||||
const int tidx = get_local_id(0);
|
||||
const int tidy = get_local_id(1);
|
||||
|
||||
__local FT s_srcPatch[10][10];
|
||||
__local FT s_dstPatch[20][16];
|
||||
|
||||
__global T * dstData = (__global T *)(dst + dst_offset);
|
||||
__global const T * srcData = (__global const T *)(src + src_offset);
|
||||
|
||||
if( tidx < 10 && tidy < 10 )
|
||||
{
|
||||
int srcx = mad24((int)get_group_id(0), lsizex>>1, tidx) - 1;
|
||||
int srcy = mad24((int)get_group_id(1), lsizey>>1, tidy) - 1;
|
||||
|
||||
srcx = abs(srcx);
|
||||
srcx = min(src_cols - 1, srcx);
|
||||
|
||||
srcy = abs(srcy);
|
||||
srcy = min(src_rows - 1, srcy);
|
||||
|
||||
s_srcPatch[tidy][tidx] = convertToFT(srcData[srcx + srcy * src_step / (int) sizeof(T)]);
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
FT sum = 0.f;
|
||||
const FT evenFlag = (FT)((tidx & 1) == 0);
|
||||
const FT oddFlag = (FT)((tidx & 1) != 0);
|
||||
const bool eveny = ((tidy & 1) == 0);
|
||||
|
||||
const FT co1 = 0.375f;
|
||||
const FT co2 = 0.25f;
|
||||
const FT co3 = 0.0625f;
|
||||
|
||||
if(eveny)
|
||||
{
|
||||
sum = ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + ( evenFlag* co1 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + ( evenFlag* co3 ) * s_srcPatch[1 + (tidy >> 1)][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[2 + tidy][tidx] = sum;
|
||||
|
||||
if (tidy < 2)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co1 ) * s_srcPatch[lsizey-16][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * co2 ) * s_srcPatch[lsizey-16][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3 ) * s_srcPatch[lsizey-16][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
|
||||
s_dstPatch[tidy][tidx] = sum;
|
||||
}
|
||||
|
||||
if (tidy > 13)
|
||||
{
|
||||
sum = 0;
|
||||
|
||||
if (eveny)
|
||||
{
|
||||
sum = (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + (evenFlag * co1) * s_srcPatch[lsizey-7][1 + ((tidx ) >> 1)];
|
||||
sum = sum + ( oddFlag * co2) * s_srcPatch[lsizey-7][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + (evenFlag * co3) * s_srcPatch[lsizey-7][1 + ((tidx + 2) >> 1)];
|
||||
}
|
||||
s_dstPatch[4 + tidy][tidx] = sum;
|
||||
}
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
sum = co3 * s_dstPatch[2 + tidy - 2][tidx];
|
||||
sum = sum + co2 * s_dstPatch[2 + tidy - 1][tidx];
|
||||
sum = sum + co1 * s_dstPatch[2 + tidy ][tidx];
|
||||
sum = sum + co2 * s_dstPatch[2 + tidy + 1][tidx];
|
||||
sum = sum + co3 * s_dstPatch[2 + tidy + 2][tidx];
|
||||
|
||||
if ((x < dst_cols) && (y < dst_rows))
|
||||
dstData[x + y * dst_step / (int)sizeof(T)] = convertToT(4.0f * sum);
|
||||
}
|
@ -401,23 +401,25 @@ pyrUp_( const Mat& _src, Mat& _dst, int)
|
||||
|
||||
typedef void (*PyrFunc)(const Mat&, Mat&, int);
|
||||
|
||||
}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
|
||||
static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
|
||||
|
||||
if (depth != CV_8U && depth != CV_16U && depth != CV_16S && depth != CV_32F)
|
||||
if (((channels != 1) && (channels != 2) && (channels != 4))
|
||||
|| (borderType != BORDER_DEFAULT))
|
||||
return false;
|
||||
if (channels != 1 && channels != 3 && channels != 4)
|
||||
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
if ((depth == CV_64F) && !(doubleSupport))
|
||||
return false;
|
||||
double doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
Size ssize = _src.size();
|
||||
Size dsize = _dsz.area() == 0 ? Size((ssize.width + 1) / 2, (ssize.height + 1) / 2) : _dsz;
|
||||
CV_Assert( ssize.width > 0 && ssize.height > 0 &&
|
||||
std::abs(dsize.width*2 - ssize.width) <= 2 &&
|
||||
std::abs(dsize.height*2 - ssize.height) <= 2 );
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
Size dsize = cv::Size((src.rows + 1) / 2, (src.cols + 1) / 2);
|
||||
_dst.create( dsize, src.type() );
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
@ -425,31 +427,73 @@ static bool ocl_pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, in
|
||||
ocl::ProgramSource2 program = ocl::imgproc::pyr_down_oclsrc;
|
||||
ocl::Kernel k;
|
||||
|
||||
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
|
||||
char cvt[2][50];
|
||||
k.create(kernelName, program,
|
||||
format("-D T=%s -D cn=%d -D convertToT=%s%s -D convertToFT=%s%s",
|
||||
ocl::typeToStr(type), channels,
|
||||
ocl::convertTypeStr(CV_32F, depth, channels, cvt[0]),
|
||||
ocl::convertTypeStr(depth, CV_32F, channels, cvt[1]),
|
||||
format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
|
||||
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
|
||||
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
|
||||
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
|
||||
|
||||
|
||||
size_t localThreads[2] = { 256, 1 };
|
||||
size_t globalThreads[2] = { src.cols, dst.rows };
|
||||
return k.run(2, globalThreads, localThreads, false);
|
||||
}
|
||||
|
||||
static bool ocl_pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType)
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), channels = CV_MAT_CN(type);
|
||||
|
||||
if (((channels != 1) && (channels != 2) && (channels != 4))
|
||||
|| (borderType != BORDER_DEFAULT))
|
||||
return false;
|
||||
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
if ((depth == CV_64F) && !(doubleSupport))
|
||||
return false;
|
||||
|
||||
Size ssize = _src.size();
|
||||
if ((_dsz.area() != 0) && (_dsz != Size(ssize.width * 2, ssize.height * 2)))
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
Size dsize = Size(ssize.width * 2, ssize.height * 2);
|
||||
_dst.create( dsize, src.type() );
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
const char * const kernelName = "pyrUp";
|
||||
ocl::ProgramSource2 program = ocl::imgproc::pyr_up_oclsrc;
|
||||
ocl::Kernel k;
|
||||
|
||||
int float_depth = depth == CV_64F ? CV_64F : CV_32F;
|
||||
char cvt[2][50];
|
||||
k.create(kernelName, program,
|
||||
format("-D T=%s -D FT=%s -D convertToT=%s -D convertToFT=%s%s",
|
||||
ocl::typeToStr(type), ocl::typeToStr(CV_MAKETYPE(float_depth, channels)),
|
||||
ocl::convertTypeStr(float_depth, depth, channels, cvt[0]),
|
||||
ocl::convertTypeStr(depth, float_depth, channels, cvt[1]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst));
|
||||
size_t globalThreads[2] = {dst.cols, dst.rows};
|
||||
size_t localThreads[2] = {16, 16};
|
||||
|
||||
return k.run(2, globalThreads, localThreads, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
|
||||
{
|
||||
if (ocl::useOpenCL() && _dst.isUMat() &&
|
||||
ocl_pyrDown(_src, _dst, _dsz, borderType))
|
||||
return;
|
||||
return;
|
||||
|
||||
Mat src = _src.getMat();
|
||||
Size dsz = _dsz == Size() ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
|
||||
Size dsz = _dsz.area() == 0 ? Size((src.cols + 1)/2, (src.rows + 1)/2) : _dsz;
|
||||
_dst.create( dsz, src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
@ -478,8 +522,12 @@ void cv::pyrDown( InputArray _src, OutputArray _dst, const Size& _dsz, int borde
|
||||
|
||||
void cv::pyrUp( InputArray _src, OutputArray _dst, const Size& _dsz, int borderType )
|
||||
{
|
||||
if (ocl::useOpenCL() && _dst.isUMat() &&
|
||||
ocl_pyrUp(_src, _dst, _dsz, borderType))
|
||||
return;
|
||||
|
||||
Mat src = _src.getMat();
|
||||
Size dsz = _dsz == Size() ? Size(src.cols*2, src.rows*2) : _dsz;
|
||||
Size dsz = _dsz.area() == 0 ? Size(src.cols*2, src.rows*2) : _dsz;
|
||||
_dst.create( dsz, src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
|
@ -45,73 +45,98 @@
|
||||
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include <iomanip>
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
using namespace cv;
|
||||
using namespace testing;
|
||||
using namespace std;
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
PARAM_TEST_CASE(PyrBase, MatDepth, Channels)
|
||||
PARAM_TEST_CASE(PyrTestBase, MatDepth, Channels, bool)
|
||||
{
|
||||
int depth;
|
||||
int channels;
|
||||
int depth, channels;
|
||||
bool use_roi;
|
||||
|
||||
Mat dst_cpu;
|
||||
ocl::oclMat gdst;
|
||||
TEST_DECLARE_INPUT_PARAMETER(src)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst)
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
depth = GET_PARAM(0);
|
||||
channels = GET_PARAM(1);
|
||||
use_roi = GET_PARAM(2);
|
||||
}
|
||||
|
||||
void generateTestData(Size src_roiSize, Size dst_roiSize)
|
||||
{
|
||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src, src_roi, src_roiSize, srcBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, dst_roiSize, dstBorder, CV_MAKETYPE(depth, channels), -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
|
||||
}
|
||||
|
||||
void Near(double threshold = 0.0)
|
||||
{
|
||||
OCL_EXPECT_MATS_NEAR(dst, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
/////////////////////// PyrDown //////////////////////////
|
||||
|
||||
typedef PyrBase PyrDown;
|
||||
typedef PyrTestBase PyrDown;
|
||||
|
||||
OCL_TEST_P(PyrDown, Mat)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
Size size(MWIDTH, MHEIGHT);
|
||||
Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255);
|
||||
ocl::oclMat gsrc(src);
|
||||
Size src_roiSize = randomSize(1, MAX_VALUE);
|
||||
Size dst_roiSize = Size(randomInt((src_roiSize.width - 1) / 2, (src_roiSize.width + 3) / 2),
|
||||
randomInt((src_roiSize.height - 1) / 2, (src_roiSize.height + 3) / 2));
|
||||
dst_roiSize = dst_roiSize.area() == 0 ? Size((src_roiSize.width + 1) / 2, (src_roiSize.height + 1) / 2) : dst_roiSize;
|
||||
generateTestData(src_roiSize, dst_roiSize);
|
||||
|
||||
pyrDown(src, dst_cpu);
|
||||
ocl::pyrDown(gsrc, gdst);
|
||||
OCL_OFF(pyrDown(src_roi, dst_roi, dst_roiSize));
|
||||
OCL_ON(pyrDown(usrc_roi, udst_roi, dst_roiSize));
|
||||
|
||||
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), depth == CV_32F ? 1e-4f : 1.0f);
|
||||
Near(depth == CV_32F ? 1e-4f : 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine(
|
||||
Values(CV_8U, CV_16U, CV_16S, CV_32F),
|
||||
Values(1, 3, 4)));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrDown, Combine(
|
||||
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
|
||||
Values(1, 2, 4),
|
||||
Bool()
|
||||
));
|
||||
|
||||
/////////////////////// PyrUp //////////////////////////
|
||||
|
||||
typedef PyrBase PyrUp;
|
||||
typedef PyrTestBase PyrUp;
|
||||
|
||||
OCL_TEST_P(PyrUp, Accuracy)
|
||||
OCL_TEST_P(PyrUp, Mat)
|
||||
{
|
||||
for (int j = 0; j < LOOP_TIMES; j++)
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
Size size(MWIDTH, MHEIGHT);
|
||||
Mat src = randomMat(size, CV_MAKETYPE(depth, channels), 0, 255);
|
||||
ocl::oclMat gsrc(src);
|
||||
Size src_roiSize = randomSize(1, MAX_VALUE);
|
||||
Size dst_roiSize = Size(2 * src_roiSize.width, 2 * src_roiSize.height);
|
||||
generateTestData(src_roiSize, dst_roiSize);
|
||||
|
||||
pyrUp(src, dst_cpu);
|
||||
ocl::pyrUp(gsrc, gdst);
|
||||
OCL_OFF(pyrUp(src_roi, dst_roi, dst_roiSize));
|
||||
OCL_ON(pyrUp(usrc_roi, udst_roi, dst_roiSize));
|
||||
|
||||
EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (depth == CV_32F ? 1e-4f : 1.0));
|
||||
Near(depth == CV_32F ? 1e-4f : 1.0f);
|
||||
}
|
||||
}
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(ImgprocPyr, PyrUp, Combine(
|
||||
Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
|
||||
Values(1, 2, 4),
|
||||
Bool()
|
||||
));
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, Combine(
|
||||
Values(CV_8U, CV_16U, CV_16S, CV_32F),
|
||||
Values(1, 3, 4)));
|
||||
#endif // HAVE_OPENCL
|
||||
|
Loading…
x
Reference in New Issue
Block a user