simplified gpu::columnSum test, it doesn't fail on Quadro anymore (when seed is 000001af5a11badd) after BFM test, but something definitely wrong with NPP_Staging's transpose
This commit is contained in:
parent
dc9e5eda19
commit
1ecb6cf775
@ -1156,25 +1156,25 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
|
|||||||
if (src_data.data != src.data)
|
if (src_data.data != src.data)
|
||||||
src.copyTo(src_data);
|
src.copyTo(src_data);
|
||||||
|
|
||||||
Size dft_size_ = dft_size;
|
Size dft_size_opt = dft_size;
|
||||||
if (is_1d_input && !is_row_dft)
|
if (is_1d_input && !is_row_dft)
|
||||||
{
|
{
|
||||||
// If the source matrix is single column handle it as single row
|
// If the source matrix is single column handle it as single row
|
||||||
dft_size_.width = std::max(dft_size.width, dft_size.height);
|
dft_size_opt.width = std::max(dft_size.width, dft_size.height);
|
||||||
dft_size_.height = std::min(dft_size.width, dft_size.height);
|
dft_size_opt.height = std::min(dft_size.width, dft_size.height);
|
||||||
}
|
}
|
||||||
|
|
||||||
cufftType dft_type = CUFFT_R2C;
|
cufftType dft_type = CUFFT_R2C;
|
||||||
if (is_complex_input)
|
if (is_complex_input)
|
||||||
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
|
dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
|
||||||
|
|
||||||
CV_Assert(dft_size_.width > 1);
|
CV_Assert(dft_size_opt.width > 1);
|
||||||
|
|
||||||
cufftHandle plan;
|
cufftHandle plan;
|
||||||
if (is_1d_input || is_row_dft)
|
if (is_1d_input || is_row_dft)
|
||||||
cufftPlan1d(&plan, dft_size_.width, dft_type, dft_size_.height);
|
cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height);
|
||||||
else
|
else
|
||||||
cufftPlan2d(&plan, dft_size_.height, dft_size_.width, dft_type);
|
cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type);
|
||||||
|
|
||||||
if (is_complex_input)
|
if (is_complex_input)
|
||||||
{
|
{
|
||||||
@ -1194,7 +1194,8 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (dft_size == dft_size_)
|
// We could swap dft_size for efficiency. Here we must reflect it
|
||||||
|
if (dft_size == dft_size_opt)
|
||||||
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);
|
createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst);
|
||||||
else
|
else
|
||||||
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);
|
createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst);
|
||||||
@ -1206,7 +1207,7 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags)
|
|||||||
cufftSafeCall(cufftDestroy(plan));
|
cufftSafeCall(cufftDestroy(plan));
|
||||||
|
|
||||||
if (is_scaled_dft)
|
if (is_scaled_dft)
|
||||||
multiply(dst, Scalar::all(1. / (dft_size.area())), dst);
|
multiply(dst, Scalar::all(1. / dft_size.area()), dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////
|
||||||
@ -1260,18 +1261,19 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
block_size.width = std::min(dft_size.width - templ.cols + 1, result.cols);
|
block_size.width = std::min(dft_size.width - templ.cols + 1, result.cols);
|
||||||
block_size.height = std::min(dft_size.height - templ.rows + 1, result.rows);
|
block_size.height = std::min(dft_size.height - templ.rows + 1, result.rows);
|
||||||
|
|
||||||
GpuMat result_data = createContinuous(dft_size, CV_32F);
|
|
||||||
|
|
||||||
int spect_len = dft_size.height * (dft_size.width / 2 + 1);
|
int spect_len = dft_size.height * (dft_size.width / 2 + 1);
|
||||||
GpuMat image_spect(1, spect_len, CV_32FC2);
|
GpuMat image_spect = createContinuous(1, spect_len, CV_32FC2);
|
||||||
GpuMat templ_spect(1, spect_len, CV_32FC2);
|
GpuMat templ_spect = createContinuous(1, spect_len, CV_32FC2);
|
||||||
GpuMat result_spect(1, spect_len, CV_32FC2);
|
GpuMat result_spect = createContinuous(1, spect_len, CV_32FC2);
|
||||||
|
|
||||||
cufftHandle planR2C, planC2R;
|
cufftHandle planR2C, planC2R;
|
||||||
cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));
|
cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R));
|
||||||
cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));
|
cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C));
|
||||||
|
|
||||||
|
GpuMat image_block = createContinuous(dft_size, CV_32F);
|
||||||
GpuMat templ_block = createContinuous(dft_size, CV_32F);
|
GpuMat templ_block = createContinuous(dft_size, CV_32F);
|
||||||
|
GpuMat result_data = createContinuous(dft_size, CV_32F);
|
||||||
|
|
||||||
GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);
|
GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step);
|
||||||
copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
|
copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
|
||||||
templ_block.cols - templ_roi.cols, 0);
|
templ_block.cols - templ_roi.cols, 0);
|
||||||
@ -1279,8 +1281,6 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
|
|||||||
cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(),
|
cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(),
|
||||||
templ_spect.ptr<cufftComplex>()));
|
templ_spect.ptr<cufftComplex>()));
|
||||||
|
|
||||||
GpuMat image_block = createContinuous(dft_size, CV_32F);
|
|
||||||
|
|
||||||
// Process all blocks of the result matrix
|
// Process all blocks of the result matrix
|
||||||
for (int y = 0; y < result.rows; y += block_size.height)
|
for (int y = 0; y < result.rows; y += block_size.height)
|
||||||
{
|
{
|
||||||
|
@ -47,7 +47,6 @@ const char* blacklist[] =
|
|||||||
{
|
{
|
||||||
"GPU-AsyncGpuMatOperator", // crash
|
"GPU-AsyncGpuMatOperator", // crash
|
||||||
"GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR
|
"GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR
|
||||||
"GPU-BruteForceMatcher", // often crashes when seed=000001af5a11badd
|
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -768,34 +768,37 @@ struct CV_GpuColumnSumTest: CvTest
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
int n = 375;
|
int cols = 375;
|
||||||
int m = 1072;
|
int rows = 1072;
|
||||||
Mat src(n, m, CV_32F);
|
|
||||||
|
Mat src(rows, cols, CV_32F);
|
||||||
RNG rng(1);
|
RNG rng(1);
|
||||||
rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(1));
|
rng.fill(src, RNG::UNIFORM, Scalar(0), Scalar(1));
|
||||||
Mat dst_gold, dst2_gold;
|
|
||||||
|
|
||||||
integral(src, dst_gold, dst2_gold);
|
GpuMat d_dst;
|
||||||
|
columnSum(GpuMat(src), d_dst);
|
||||||
|
|
||||||
GpuMat dsrc(src);
|
Mat dst = d_dst;
|
||||||
GpuMat buf;
|
for (int j = 0; j < src.cols; ++j)
|
||||||
GpuMat dst;
|
|
||||||
columnSum(dsrc, buf);
|
|
||||||
transpose(buf, dst);
|
|
||||||
columnSum(dst, buf);
|
|
||||||
transpose(buf, dst);
|
|
||||||
|
|
||||||
Mat dst_ = dst;
|
|
||||||
for (int i = 0; i < dst_.rows; ++i)
|
|
||||||
{
|
{
|
||||||
const double* dst_gold_data = (const double*)dst_gold.ptr(i + 1);
|
float a = src.at<float>(0, j);
|
||||||
for (int j = 0; j < dst_.cols; ++j)
|
float b = dst.at<float>(0, j);
|
||||||
{
|
|
||||||
float a = (float)dst_gold_data[j + 1];
|
|
||||||
float b = dst_.at<float>(i, j);
|
|
||||||
if (fabs(a - b) > 0.5f)
|
if (fabs(a - b) > 0.5f)
|
||||||
{
|
{
|
||||||
ts->printf(CvTS::CONSOLE, "%d %d %f %f\n", i, j, a, b);
|
ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", 0, j, a, b);
|
||||||
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 1; i < src.rows; ++i)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < src.cols; ++j)
|
||||||
|
{
|
||||||
|
float a = src.at<float>(i, j) += src.at<float>(i - 1, j);
|
||||||
|
float b = dst.at<float>(i, j);
|
||||||
|
if (fabs(a - b) > 0.5f)
|
||||||
|
{
|
||||||
|
ts->printf(CvTS::CONSOLE, "big diff at %d %d: %f %f\n", i, j, a, b);
|
||||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -804,6 +807,7 @@ struct CV_GpuColumnSumTest: CvTest
|
|||||||
}
|
}
|
||||||
catch (const Exception& e)
|
catch (const Exception& e)
|
||||||
{
|
{
|
||||||
|
ts->printf(CvTS::CONSOLE, e.what());
|
||||||
if (!check_and_treat_gpu_exception(e, ts)) throw;
|
if (!check_and_treat_gpu_exception(e, ts)) throw;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user