fixed several problems with CUDA 5.0

* gpu::LUT, uses device memory instead of host memory
* gpu::multiply, round mod for CV_8U depth
This commit is contained in:
Vladislav Vinogradov 2012-05-28 12:09:40 +00:00
parent 71625ad458
commit 2582464e51
4 changed files with 88 additions and 47 deletions

View File

@ -320,12 +320,23 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
const Npp32s* pLevels3[3];
int nValues3[3];
#if (CUDA_VERSION > 4020)
GpuMat d_pLevels;
#endif
LevelsInit()
{
nValues3[0] = nValues3[1] = nValues3[2] = 256;
for (int i = 0; i < 256; ++i)
pLevels[i] = i;
#if (CUDA_VERSION <= 4020)
pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
#else
d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
#endif
}
};
static LevelsInit lvls;
@ -350,22 +361,48 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
if (src.type() == CV_8UC1)
{
#if (CUDA_VERSION <= 4020)
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
#else
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), lvls.d_pLevels.ptr<Npp32s>(), 256) );
#endif
}
else
{
Mat nppLut3[3];
const Npp32s* pValues3[3];
Mat nppLut3[3];
if (nppLut.channels() == 1)
{
#if (CUDA_VERSION <= 4020)
pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
#else
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>();
#endif
}
else
{
cv::split(nppLut, nppLut3);
#if (CUDA_VERSION <= 4020)
pValues3[0] = nppLut3[0].ptr<Npp32s>();
pValues3[1] = nppLut3[1].ptr<Npp32s>();
pValues3[2] = nppLut3[2].ptr<Npp32s>();
#else
GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data));
GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data));
GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data));
pValues3[0] = d_nppLut0.ptr<Npp32s>();
pValues3[1] = d_nppLut1.ptr<Npp32s>();
pValues3[2] = d_nppLut2.ptr<Npp32s>();
#endif
}
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, lvls.pLevels3, lvls.nValues3) );
}

View File

@ -658,7 +658,11 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
#if (CUDA_VERSION <= 4020)
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
#else
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F && src1.depth() > CV_8U)
#endif
{
npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
return;

View File

@ -1189,18 +1189,18 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine(
////////////////////////////////////////////////////////////////////////////////
// Abs
PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
int depth;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
depth = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
@ -1209,9 +1209,9 @@ PARAM_TEST_CASE(Abs, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Abs, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::abs(loadMat(src, useRoi), dst);
cv::Mat dst_gold = cv::abs(src);
@ -1222,24 +1222,24 @@ TEST_P(Abs, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_Core, Abs, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_16SC1), MatType(CV_32FC1)),
testing::Values(MatDepth(CV_16S), MatDepth(CV_32F)),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
// Sqr
PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
int depth;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
depth = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
@ -1248,9 +1248,9 @@ PARAM_TEST_CASE(Sqr, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Sqr, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::Mat src = randomMat(size, depth, 0, depth == CV_8U ? 16 : 255);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::sqr(loadMat(src, useRoi), dst);
cv::Mat dst_gold;
@ -1262,10 +1262,10 @@ TEST_P(Sqr, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_Core, Sqr, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1),
MatType(CV_16UC1),
MatType(CV_16SC1),
MatType(CV_32FC1)),
testing::Values(MatDepth(CV_8U),
MatDepth(CV_16U),
MatDepth(CV_16S),
MatDepth(CV_32F)),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
@ -1295,18 +1295,18 @@ void sqrtGold(const cv::Mat& src, cv::Mat& dst)
funcs[src.depth()](src, dst);
}
PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
int depth;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
depth = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
@ -1315,24 +1315,24 @@ PARAM_TEST_CASE(Sqrt, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Sqrt, Accuracy)
{
cv::Mat src = randomMat(size, type);
cv::Mat src = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::sqrt(loadMat(src, useRoi), dst);
cv::Mat dst_gold;
sqrtGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-5);
}
INSTANTIATE_TEST_CASE_P(GPU_Core, Sqrt, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1),
MatType(CV_16UC1),
MatType(CV_16SC1),
MatType(CV_32FC1)),
testing::Values(MatDepth(CV_8U),
MatDepth(CV_16U),
MatDepth(CV_16S),
MatDepth(CV_32F)),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
@ -1362,18 +1362,18 @@ void logGold(const cv::Mat& src, cv::Mat& dst)
funcs[src.depth()](src, dst);
}
PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
int depth;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
depth = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
@ -1382,24 +1382,24 @@ PARAM_TEST_CASE(Log, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Log, Accuracy)
{
cv::Mat src = randomMat(size, type, 1.0, 255.0);
cv::Mat src = randomMat(size, depth, 1.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::log(loadMat(src, useRoi), dst);
cv::Mat dst_gold;
logGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-6);
}
INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1),
MatType(CV_16UC1),
MatType(CV_16SC1),
MatType(CV_32FC1)),
testing::Values(MatDepth(CV_8U),
MatDepth(CV_16U),
MatDepth(CV_16S),
MatDepth(CV_32F)),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////
@ -1439,18 +1439,18 @@ void expGold(const cv::Mat& src, cv::Mat& dst)
funcs[src.depth()](src, dst);
}
PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
{
cv::gpu::DeviceInfo devInfo;
cv::Size size;
int type;
int depth;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
type = GET_PARAM(2);
depth = GET_PARAM(2);
useRoi = GET_PARAM(3);
cv::gpu::setDevice(devInfo.deviceID());
@ -1459,24 +1459,24 @@ PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
TEST_P(Exp, Accuracy)
{
cv::Mat src = randomMat(size, type, 0.0, 10.0);
cv::Mat src = randomMat(size, depth, 0.0, 10.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::exp(loadMat(src, useRoi), dst);
cv::Mat dst_gold;
expGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-2);
EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 1.0 : 1e-2);
}
INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
testing::Values(MatType(CV_8UC1),
MatType(CV_16UC1),
MatType(CV_16SC1),
MatType(CV_32FC1)),
testing::Values(MatDepth(CV_8U),
MatDepth(CV_16U),
MatDepth(CV_16S),
MatDepth(CV_32F)),
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////////

View File

@ -311,7 +311,7 @@ TEST_P(ConvertTo, WithScaling)
cv::Mat dst_gold;
src.convertTo(dst_gold, depth2, a, b);
EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 0.0 : 1e-4);
EXPECT_MAT_NEAR(dst_gold, dst, depth2 < CV_32F ? 1.0 : 1e-4);
}
}