From abcf8d9e610e08227de9cada14868e46a651b8d7 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sun, 29 Dec 2013 18:01:01 +0400 Subject: [PATCH 1/3] implemented OpenCL version of cv::convertScaleAbs --- modules/core/src/convert.cpp | 39 +++++++++++++++++++++++++++ modules/core/src/opencl/arithm.cl | 9 +++---- modules/core/test/ocl/test_arithm.cpp | 18 +++++++++++++ 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp index dba8c7b0c..c2014f1be 100644 --- a/modules/core/src/convert.cpp +++ b/modules/core/src/convert.cpp @@ -1266,10 +1266,49 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth) return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)]; } +static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) +{ + int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + + if (!doubleSupport && depth == CV_64F) + return false; + + char cvt[2][50]; + int wdepth = std::max(depth, CV_32F); + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, + format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=uchar -D srcT1=%s" + " -D workT=%s -D convertToWT1=%s -D convertToDT=%s%s", + ocl::typeToStr(depth), ocl::typeToStr(wdepth), + ocl::convertTypeStr(depth, wdepth, 1, cvt[0]), + ocl::convertTypeStr(wdepth, CV_8U, 1, cvt[1]), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + _dst.createSameSize(_src, CV_8UC(cn)); + UMat src = _src.getUMat(), dst = _dst.getUMat(); + + ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), + dstarg = ocl::KernelArg::WriteOnly(dst, cn); + + if (wdepth == CV_32F) + k.args(srcarg, dstarg, (float)alpha, (float)beta); + else if (wdepth == CV_64F) + k.args(srcarg, dstarg, alpha, beta); + + size_t globalsize[2] = { src.cols * cn, src.rows }; + return k.run(2, globalsize, NULL, false); +} + } void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta ) { + if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat() && + ocl_convertScaleAbs(_src, _dst, alpha, beta)) + return; + Mat src = _src.getMat(); int cn = src.channels(); double scale[] = {alpha, beta}; diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 1647e8d19..add4b0695 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -223,13 +223,12 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) #define convertToWT2 #define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0) -#elif defined OP_CONVERT -#define PROCESS_ELEM dstelem = convertToDT(srcelem1) - -#elif defined OP_CONVERT_SCALE +#elif defined OP_CONVERT_SCALE_ABS #undef EXTRA_PARAMS #define EXTRA_PARAMS , workT alpha, workT beta -#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta) +#define PROCESS_ELEM \ + workT value = srcelem1 * alpha + beta; \ + dstelem = convertToDT(value >= 0 ? value : -value) #elif defined OP_CTP_AD || defined OP_CTP_AR #ifdef OP_CTP_AD diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index 3aa47b7d2..df692b818 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1324,6 +1324,23 @@ OCL_TEST_P(InRange, Scalar) } +//////////////////////////////// ConvertScaleAbs //////////////////////////////////////////////// + +typedef ArithmTestBase ConvertScaleAbs; + +OCL_TEST_P(ConvertScaleAbs, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::convertScaleAbs(src1_roi, dst1_roi, val[0], val[1])); + OCL_ON(cv::convertScaleAbs(usrc1_roi, udst1_roi, val[0], val[1])); + + Near(depth <= CV_32S ? 1 : 1e-6); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); @@ -1360,6 +1377,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNE OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); } } // namespace cvtest::ocl From 6b64257c811ff63effa95026950d2dca14efd95e Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sun, 29 Dec 2013 18:46:25 +0400 Subject: [PATCH 2/3] added OpenCL version of cv::scaleAdd --- modules/core/src/matmul.cpp | 48 +++++++++++++++++++++++++-- modules/core/src/opencl/arithm.cl | 8 +++++ modules/core/test/ocl/test_arithm.cpp | 19 ++++++++++- 3 files changed, 71 insertions(+), 4 deletions(-) diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index dc90ac447..3081676f5 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -41,6 +41,7 @@ //M*/ #include "precomp.hpp" +#include "opencl_kernels.hpp" #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp" #ifdef HAVE_IPP @@ -2154,20 +2155,61 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst, typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, int len, const void* alpha); +static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type ) +{ + int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F); + bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; + Size size = _src1.size(); + + if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() ) + return false; + + char cvt[2][50]; + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, + format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s" + " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s%s", ocl::typeToStr(depth), + ocl::typeToStr(wdepth), ocl::convertTypeStr(depth, wdepth, 1, cvt[0]), + ocl::convertTypeStr(wdepth, depth, 1, cvt[1]), + doubleSupport ? " -D DOUBLE_SUPPORT" : "")); + if (k.empty()) + return false; + + _dst.create(size, type); + UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat(); + + ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1), + src2arg = ocl::KernelArg::ReadOnlyNoSize(src2), + dstarg = ocl::KernelArg::WriteOnly(dst, cn); + + if (wdepth == CV_32F) + k.args(src1arg, src2arg, dstarg, (float)alpha); + else + k.args(src1arg, src2arg, dstarg, alpha); + + size_t globalsize[2] = { dst.cols * cn, dst.rows }; + return k.run(2, globalsize, NULL, false); +} + } void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst ) { - Mat src1 = _src1.getMat(), src2 = _src2.getMat(); - int depth = src1.depth(), cn = src1.channels(); + int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + CV_Assert( type == _src2.type() ); + + if (ocl::useOpenCL() && _src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat() && + ocl_scaleAdd(_src1, alpha, _src2, _dst, type)) + return; - CV_Assert( src1.type() == src2.type() ); if( depth < CV_32F ) { addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth); return; } + Mat src1 = _src1.getMat(), src2 = _src2.getMat(); + CV_Assert(src1.size == src2.size); + _dst.create(src1.dims, src1.size, src1.type()); Mat dst = _dst.getMat(); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index add4b0695..605fe4785 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -91,6 +91,9 @@ #else + #ifndef convertToWT2 + #define convertToWT2 convertToWT1 + #endif #define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index)) #define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index)) @@ -230,6 +233,11 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) workT value = srcelem1 * alpha + beta; \ dstelem = convertToDT(value >= 0 ? value : -value) +#elif defined OP_SCALE_ADD +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , workT alpha +#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * alpha + srcelem2) + #elif defined OP_CTP_AD || defined OP_CTP_AR #ifdef OP_CTP_AD #define TO_DEGREE cartToPolar *= (180 / CV_PI); diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index df692b818..f2b987514 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -1323,7 +1323,6 @@ OCL_TEST_P(InRange, Scalar) } } - //////////////////////////////// ConvertScaleAbs //////////////////////////////////////////////// typedef ArithmTestBase ConvertScaleAbs; @@ -1341,6 +1340,23 @@ OCL_TEST_P(ConvertScaleAbs, Mat) } } +//////////////////////////////// ScaleAdd //////////////////////////////////////////////// + +typedef ArithmTestBase ScaleAdd; + +OCL_TEST_P(ScaleAdd, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi)); + OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi)); + + Near(depth <= CV_32S ? 1 : 1e-6); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); @@ -1378,6 +1394,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_6 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); } } // namespace cvtest::ocl From 3e1bec52486bab3002e39fd912727b1a85d0a30a Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Mon, 30 Dec 2013 01:21:04 +0400 Subject: [PATCH 3/3] added OpenCL version of cv::patchNaNs --- modules/core/src/mathfuncs.cpp | 23 +++++++++- modules/core/src/opencl/arithm.cl | 7 ++++ modules/core/test/ocl/test_arithm.cpp | 60 +++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp index 0b596071a..90e0d74a4 100644 --- a/modules/core/src/mathfuncs.cpp +++ b/modules/core/src/mathfuncs.cpp @@ -2364,12 +2364,31 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma return badPt.x < 0; } +static bool ocl_patchNaNs( InputOutputArray _a, float value ) +{ + ocl::Kernel k("KF", ocl::core::arithm_oclsrc, + format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=int")); + if (k.empty()) + return false; + + UMat a = _a.getUMat(); + int cn = a.channels(); + + k.args(ocl::KernelArg::ReadOnlyNoSize(a), + ocl::KernelArg::WriteOnly(a), (float)value); + + size_t globalsize[2] = { a.cols * cn, a.rows }; + return k.run(2, globalsize, NULL, false); +} void patchNaNs( InputOutputArray _a, double _val ) { - Mat a = _a.getMat(); - CV_Assert( a.depth() == CV_32F ); + CV_Assert( _a.depth() == CV_32F ); + if (ocl::useOpenCL() && _a.isUMat() && _a.dims() <= 2 && ocl_patchNaNs(_a, (float)_val)) + return; + + Mat a = _a.getMat(); const Mat* arrays[] = {&a, 0}; int* ptrs[1]; NAryMatIterator it(arrays, (uchar**)ptrs); diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl index 605fe4785..c8fd99eef 100644 --- a/modules/core/src/opencl/arithm.cl +++ b/modules/core/src/opencl/arithm.cl @@ -271,6 +271,13 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v) dstelem = cos(alpha) * x; \ dstelem2 = sin(alpha) * x +#elif defined OP_PATCH_NANS +#undef EXTRA_PARAMS +#define EXTRA_PARAMS , int val +#define PROCESS_ELEM \ + if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \ + dstelem = val + #else #error "unknown op type" #endif diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp index f2b987514..03d842218 100644 --- a/modules/core/test/ocl/test_arithm.cpp +++ b/modules/core/test/ocl/test_arithm.cpp @@ -42,6 +42,8 @@ #include "test_precomp.hpp" #include "opencv2/ts/ocl_test.hpp" +#include + #ifdef HAVE_OPENCL namespace cvtest { @@ -1357,6 +1359,63 @@ OCL_TEST_P(ScaleAdd, Mat) } } +//////////////////////////////// PatchNans //////////////////////////////////////////////// + +PARAM_TEST_CASE(PatchNaNs, Channels, bool) +{ + int cn; + bool use_roi; + double value; + + TEST_DECLARE_INPUT_PARAMETER(src) + + virtual void SetUp() + { + cn = GET_PARAM(0); + use_roi = GET_PARAM(1); + } + + virtual void generateTestData() + { + const int type = CV_MAKE_TYPE(CV_32F, cn); + + Size roiSize = randomSize(1, 10); + Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0); + randomSubMat(src, src_roi, roiSize, srcBorder, type, -40, 40); + + // generating NaNs + roiSize.width *= cn; + for (int y = 0; y < roiSize.height; ++y) + { + float * const ptr = src_roi.ptr(y); + for (int x = 0; x < roiSize.width; ++x) + ptr[x] = randomInt(-1, 1) == 0 ? std::numeric_limits::quiet_NaN() : ptr[x]; + } + + value = randomDouble(-100, 100); + + UMAT_UPLOAD_INPUT_PARAMETER(src) + } + + void Near() + { + OCL_EXPECT_MATS_NEAR(src, 0) + } +}; + +OCL_TEST_P(PatchNaNs, Mat) +{ + for (int j = 0; j < test_loop_times; j++) + { + generateTestData(); + + OCL_OFF(cv::patchNaNs(src_roi, value)); + OCL_ON(cv::patchNaNs(usrc_roi, value)); + + Near(); + } +} + //////////////////////////////////////// Instantiation ///////////////////////////////////////// OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); @@ -1395,6 +1454,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Ch OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool())); +OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool())); } } // namespace cvtest::ocl