removed NPP implementation

This commit is contained in:
Vladislav Vinogradov 2013-08-22 11:46:09 +04:00
parent e1397f6c1f
commit f826bd8bce
2 changed files with 33 additions and 122 deletions

View File

@ -44,18 +44,7 @@
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
{
(void)src;
(void)dst;
(void)dsize;
(void)fx;
(void)fy;
(void)interpolation;
(void)s;
throw_nogpu();
}
void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }
#else // HAVE_CUDA
@ -69,94 +58,57 @@ namespace cv { namespace gpu { namespace device
}
}}}
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& stream)
{
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
|| interpolation == INTER_CUBIC || interpolation == INTER_AREA);
CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
using namespace ::cv::gpu::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{resize_gpu<uchar> , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3> , resize_gpu<uchar4> },
{0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/ , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/},
{resize_gpu<ushort> , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3> , resize_gpu<ushort4> },
{resize_gpu<short> , 0 /*resize_gpu<short2>*/ , resize_gpu<short3> , resize_gpu<short4> },
{0 /*resize_gpu<int>*/ , 0 /*resize_gpu<int2>*/ , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ },
{resize_gpu<float> , 0 /*resize_gpu<float2>*/ , resize_gpu<float3> , resize_gpu<float4> }
};
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC || interpolation == INTER_AREA );
CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
if (dsize == Size())
{
dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
}
else
{
fx = static_cast<double>(dsize.width) / src.cols;
fy = static_cast<double>(dsize.height) / src.rows;
}
if (dsize != dst.size())
dst.create(dsize, src.type());
dst.create(dsize, src.type());
if (dsize == src.size())
{
if (s)
s.enqueueCopy(src, dst);
if (stream)
stream.enqueueCopy(src, dst);
else
src.copyTo(dst);
return;
}
cudaStream_t stream = StreamAccessor::getStream(s);
const func_t func = funcs[src.depth()][src.channels() - 1];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
Size wholeSize;
Point ofs;
src.locateROI(wholeSize, ofs);
PtrStepSzb wholeSrc(wholeSize.height, wholeSize.width, src.datastart, src.step);
bool useNpp = (src.type() == CV_8UC1 || src.type() == CV_8UC4);
useNpp = useNpp && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);
if (useNpp)
{
typedef NppStatus (*func_t)(const Npp8u * pSrc, NppiSize oSrcSize, int nSrcStep, NppiRect oSrcROI, Npp8u * pDst, int nDstStep, NppiSize dstROISize,
double xFactor, double yFactor, int eInterpolation);
const func_t funcs[4] = { nppiResize_8u_C1R, 0, 0, nppiResize_8u_C4R };
static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};
NppiSize srcsz;
srcsz.width = wholeSize.width;
srcsz.height = wholeSize.height;
NppiRect srcrect;
srcrect.x = ofs.x;
srcrect.y = ofs.y;
srcrect.width = src.cols;
srcrect.height = src.rows;
NppiSize dstsz;
dstsz.width = dst.cols;
dstsz.height = dst.rows;
NppStreamHandler h(stream);
nppSafeCall( funcs[src.channels() - 1](src.datastart, srcsz, static_cast<int>(src.step), srcrect,
dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
else
{
using namespace ::cv::gpu::device::imgproc;
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
static const func_t funcs[6][4] =
{
{resize_gpu<uchar> , 0 /*resize_gpu<uchar2>*/ , resize_gpu<uchar3> , resize_gpu<uchar4> },
{0 /*resize_gpu<schar>*/, 0 /*resize_gpu<char2>*/ , 0 /*resize_gpu<char3>*/, 0 /*resize_gpu<char4>*/},
{resize_gpu<ushort> , 0 /*resize_gpu<ushort2>*/, resize_gpu<ushort3> , resize_gpu<ushort4> },
{resize_gpu<short> , 0 /*resize_gpu<short2>*/ , resize_gpu<short3> , resize_gpu<short4> },
{0 /*resize_gpu<int>*/ , 0 /*resize_gpu<int2>*/ , 0 /*resize_gpu<int3>*/ , 0 /*resize_gpu<int4>*/ },
{resize_gpu<float> , 0 /*resize_gpu<float2>*/ , resize_gpu<float3> , resize_gpu<float4> }
};
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert(func != 0);
func(src, PtrStepSzb(wholeSize.height, wholeSize.width, src.datastart, src.step), ofs.x, ofs.y,
static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, stream);
}
func(src, wholeSrc, ofs.x, ofs.y, static_cast<float>(1.0 / fx), static_cast<float>(1.0 / fy), dst, interpolation, StreamAccessor::getStream(stream));
}
#endif // HAVE_CUDA

View File

@ -155,7 +155,7 @@ GPU_TEST_P(Resize, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Resize, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(0.3, 0.5, 1.5, 2.0),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)),
WHOLE_SUBMAT));
@ -201,50 +201,9 @@ GPU_TEST_P(ResizeSameAsHost, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeSameAsHost, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC3), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC3), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC3), MatType(CV_32FC4)),
testing::Values(0.3, 0.5),
testing::Values(Interpolation(cv::INTER_AREA), Interpolation(cv::INTER_NEAREST)), //, Interpolation(cv::INTER_LINEAR), Interpolation(cv::INTER_CUBIC)
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_AREA)),
WHOLE_SUBMAT));
///////////////////////////////////////////////////////////////////
// Test NPP
PARAM_TEST_CASE(ResizeNPP, cv::gpu::DeviceInfo, MatType, double, Interpolation)
{
cv::gpu::DeviceInfo devInfo;
double coeff;
int interpolation;
int type;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
type = GET_PARAM(1);
coeff = GET_PARAM(2);
interpolation = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
}
};
GPU_TEST_P(ResizeNPP, Accuracy)
{
cv::Mat src = readImageType("stereobp/aloe-L.png", type);
ASSERT_FALSE(src.empty());
cv::gpu::GpuMat dst;
cv::gpu::resize(loadMat(src), dst, cv::Size(), coeff, coeff, interpolation);
cv::Mat dst_gold;
resizeGold(src, dst_gold, coeff, coeff, interpolation);
EXPECT_MAT_SIMILAR(dst_gold, dst, 1e-1);
}
INSTANTIATE_TEST_CASE_P(GPU_ImgProc, ResizeNPP, testing::Combine(
ALL_DEVICES,
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)),
testing::Values(0.3, 0.5, 1.5, 2.0),
testing::Values(Interpolation(cv::INTER_NEAREST), Interpolation(cv::INTER_LINEAR))));
#endif // HAVE_CUDA