From 1fe403f461e138eac078e10c6cc2652754509222 Mon Sep 17 00:00:00 2001 From: vbystricky Date: Wed, 13 Aug 2014 10:21:16 +0400 Subject: [PATCH 1/4] Enable OpenCL version of norm and convertScaleAbs or 32F data Fix error in minmaxloc.cl Change test for convertScaleAbs Fix minMaxIdx for _src2 align Change epsilon on the tests --- modules/core/src/convert.cpp | 8 ++-- modules/core/src/opencl/minmaxloc.cl | 4 +- modules/core/src/stat.cpp | 8 +--- modules/core/test/ocl/test_arithm.cpp | 58 +++++++++++++++++++++++---- 4 files changed, 57 insertions(+), 21 deletions(-) diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index 61499b39a..754616e13 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -1760,13 +1760,12 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha kercn = ocl::predictOptimalVectorWidth(_src, _dst), rowsPerWI = d.isIntel() ? 4 : 1; bool doubleSupport = d.doubleFPConfig() > 0; - if (depth == CV_32F || depth == CV_64F) + if (!doubleSupport && depth == CV_64F) return false; char cvt[2][50]; int wdepth = std::max(depth, CV_32F); - ocl::Kernel k("KF", ocl::core::arithm_oclsrc, - format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" + String build_opt = format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=%s -D srcT1=%s" " -D workT=%s -D wdepth=%d -D convertToWT1=%s -D convertToDT=%s" " -D workT1=%s -D rowsPerWI=%d%s", ocl::typeToStr(CV_8UC(kercn)), @@ -1775,7 +1774,8 @@ static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), ocl::convertTypeStr(wdepth, CV_8U, kercn, cvt[1]), ocl::typeToStr(wdepth), rowsPerWI, - doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + doubleSupport ? " -D DOUBLE_SUPPORT" : ""); + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, build_opt); if (k.empty()) return false; diff --git a/modules/core/src/opencl/minmaxloc.cl b/modules/core/src/opencl/minmaxloc.cl index cb72a4ec1..bd026c5c8 100644 --- a/modules/core/src/opencl/minmaxloc.cl +++ b/modules/core/src/opencl/minmaxloc.cl @@ -98,7 +98,7 @@ #ifdef OP_CALC2 #define CALC_MAX2(p) \ - maxval2 = MAX(maxval2, temp.p); + maxval2 = MAX(maxval2, temp2.p); #else #define CALC_MAX2(p) #endif @@ -196,7 +196,7 @@ __kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_off #ifdef HAVE_SRC2 #ifdef HAVE_SRC2_CONT - src2_index = mul24(id, srcTSIZE); + src2_index = id * srcTSIZE; //mul24(id, srcTSIZE); #else src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE)); #endif diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index e42f8224f..87e99e585 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1444,7 +1444,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), haveSrc2 = _src2.kind() != _InputArray::NONE; int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), - kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src)); + kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2)); CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) || (cn >= 1 && !minLoc && !maxLoc) ); @@ -2190,9 +2190,6 @@ static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & (!doubleSupport && depth == CV_64F)) return false; - if( depth == CV_32F && (!_mask.empty() || normType == NORM_INF) ) - return false; - UMat src = _src.getUMat(); if (normType == NORM_INF) @@ -2548,9 +2545,6 @@ static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArr normType &= ~NORM_RELATIVE; bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR; - if ( !normsum || !_mask.empty() ) - return false; - if (normsum) { if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ? diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 6e0884cb1..79e4281a3 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -341,7 +341,7 @@ OCL_TEST_P(Mul, Mat_Scalar_Scale) OCL_OFF(cv::multiply(src1_roi, val, dst1_roi, val[0])); OCL_ON(cv::multiply(usrc1_roi, val, udst1_roi, val[0])); - Near(udst1_roi.depth() >= CV_32F ? 2e-2 : 1); + Near(udst1_roi.depth() >= CV_32F ? 1e-2 : 1); } } @@ -397,7 +397,7 @@ OCL_TEST_P(Div, Mat_Scale) OCL_OFF(cv::divide(src1_roi, src2_roi, dst1_roi, val[0])); OCL_ON(cv::divide(usrc1_roi, usrc2_roi, udst1_roi, val[0])); - Near(udst1_roi.depth() >= CV_32F ? 2e-2 : 1); + Near(udst1_roi.depth() >= CV_32F ? 4e-3 : 1); } } @@ -1303,7 +1303,7 @@ OCL_TEST_P(Norm, NORM_INF_2args) OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type)); OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type)); - EXPECT_NEAR(cpuRes, gpuRes, 0.2); + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 2e-2); } } @@ -1323,7 +1323,7 @@ OCL_TEST_P(Norm, NORM_INF_2args_mask) OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi)); OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi)); - EXPECT_NEAR(cpuRes, gpuRes, 0.1); + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 2e-2); } } @@ -1547,7 +1547,49 @@ OCL_TEST_P(InRange, Scalar) //////////////////////////////// ConvertScaleAbs //////////////////////////////////////////////// -typedef ArithmTestBase ConvertScaleAbs; +PARAM_TEST_CASE(ConvertScaleAbs, MatDepth, Channels, bool) +{ + int depth; + int cn; + bool use_roi; + cv::Scalar val; + + TEST_DECLARE_INPUT_PARAMETER(src); + TEST_DECLARE_OUTPUT_PARAMETER(dst); + + virtual void SetUp() + { + depth = GET_PARAM(0); + cn = GET_PARAM(1); + use_roi = GET_PARAM(2); + } + + virtual void generateTestData() + { + const int stype = CV_MAKE_TYPE(depth, cn); + const int dtype = CV_MAKE_TYPE(CV_8U, cn); + + Size roiSize = randomSize(1, MAX_VALUE); + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, stype, 2, 11); // FIXIT: Test with minV, maxV + + Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(dst, dst_roi, roiSize, dstBorder, dtype, 5, 16); + + val = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0), + rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0)); + + UMAT_UPLOAD_INPUT_PARAMETER(src); + UMAT_UPLOAD_OUTPUT_PARAMETER(dst); + } + + void Near(double threshold = 0.) + { + OCL_EXPECT_MATS_NEAR(dst, threshold); + } + +}; + OCL_TEST_P(ConvertScaleAbs, Mat) { @@ -1555,10 +1597,10 @@ OCL_TEST_P(ConvertScaleAbs, Mat) { generateTestData(); - OCL_OFF(cv::convertScaleAbs(src1_roi, dst1_roi, val[0], val[1])); - OCL_ON(cv::convertScaleAbs(usrc1_roi, udst1_roi, val[0], val[1])); + OCL_OFF(cv::convertScaleAbs(src_roi, dst_roi, val[0], val[1])); + OCL_ON(cv::convertScaleAbs(usrc_roi, udst_roi, val[0], val[1])); - Near(depth <= CV_32S ? 1 : 1e-6); + Near(1); } } From c5632d412e0a8d084c3edd3b0094229c03776029 Mon Sep 17 00:00:00 2001 From: VBystricky Date: Wed, 13 Aug 2014 23:50:32 +0400 Subject: [PATCH 2/4] Change epsilon for NORM_INF_2args accuracy tests --- modules/core/test/ocl/test_arithm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 79e4281a3..828911905 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1303,7 +1303,7 @@ OCL_TEST_P(Norm, NORM_INF_2args) OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type)); OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type)); - EXPECT_PRED3(relativeError, cpuRes, gpuRes, 2e-2); + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); } } @@ -1323,7 +1323,7 @@ OCL_TEST_P(Norm, NORM_INF_2args_mask) OCL_OFF(const double cpuRes = cv::norm(src1_roi, src2_roi, type, mask_roi)); OCL_ON(const double gpuRes = cv::norm(usrc1_roi, usrc2_roi, type, umask_roi)); - EXPECT_PRED3(relativeError, cpuRes, gpuRes, 2e-2); + EXPECT_PRED3(relativeError, cpuRes, gpuRes, 1e-6); } } From aab6f6c56f07bf4d03e75d416a9d77384bb087f4 Mon Sep 17 00:00:00 2001 From: vbystricky Date: Thu, 14 Aug 2014 12:14:56 +0400 Subject: [PATCH 3/4] Set sync=true in ocl_minMaxIdx kernel call. For tests --- modules/core/src/stat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 87e99e585..f3b6b9a2f 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1536,7 +1536,7 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* } size_t globalsize = groupnum * wgs; - if (!k.run(1, &globalsize, &wgs, false)) + if (!k.run(1, &globalsize, &wgs, true)) return false; static const getMinMaxResFunc functab[7] = From 942ff5be579869a70c1f3187ad5601a116bbb6c7 Mon Sep 17 00:00:00 2001 From: vbystricky Date: Thu, 14 Aug 2014 17:39:41 +0400 Subject: [PATCH 4/4] Disable OpenCL version of minMaxIdx with mask on AMD devices --- modules/core/src/stat.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index f3b6b9a2f..51154a4da 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -1446,6 +1446,9 @@ static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2)); + if (haveMask && dev.isAMD()) + return false; + CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) || (cn >= 1 && !minLoc && !maxLoc) );