From abcf8d9e610e08227de9cada14868e46a651b8d7 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Sun, 29 Dec 2013 18:01:01 +0400
Subject: [PATCH 1/3] implemented OpenCL version of cv::convertScaleAbs

---
 modules/core/src/convert.cpp          | 39 +++++++++++++++++++++++++++
 modules/core/src/opencl/arithm.cl     |  9 +++----
 modules/core/test/ocl/test_arithm.cpp | 18 +++++++++++++
 3 files changed, 61 insertions(+), 5 deletions(-)

diff --git a/modules/core/src/convert.cpp b/modules/core/src/convert.cpp
index dba8c7b0c..c2014f1be 100644
--- a/modules/core/src/convert.cpp
+++ b/modules/core/src/convert.cpp
@@ -1266,10 +1266,49 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
     return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
 }
 
+static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
+{
+    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
+
+    if (!doubleSupport && depth == CV_64F)
+        return false;
+
+    char cvt[2][50];
+    int wdepth = std::max(depth, CV_32F);
+    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
+                  format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=uchar -D srcT1=%s"
+                         " -D workT=%s -D convertToWT1=%s -D convertToDT=%s%s",
+                         ocl::typeToStr(depth), ocl::typeToStr(wdepth),
+                         ocl::convertTypeStr(depth, wdepth, 1, cvt[0]),
+                         ocl::convertTypeStr(wdepth, CV_8U, 1, cvt[1]),
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+    if (k.empty())
+        return false;
+
+    _dst.createSameSize(_src, CV_8UC(cn));
+    UMat src = _src.getUMat(), dst = _dst.getUMat();
+
+    ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
+            dstarg = ocl::KernelArg::WriteOnly(dst, cn);
+
+    if (wdepth == CV_32F)
+        k.args(srcarg, dstarg, (float)alpha, (float)beta);
+    else if (wdepth == CV_64F)
+        k.args(srcarg, dstarg, alpha, beta);
+
+    size_t globalsize[2] = { src.cols * cn, src.rows };
+    return k.run(2, globalsize, NULL, false);
+}
+
 }
 
 void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
 {
+    if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat() &&
+            ocl_convertScaleAbs(_src, _dst, alpha, beta))
+        return;
+
     Mat src = _src.getMat();
     int cn = src.channels();
     double scale[] = {alpha, beta};
diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl
index 1647e8d19..add4b0695 100644
--- a/modules/core/src/opencl/arithm.cl
+++ b/modules/core/src/opencl/arithm.cl
@@ -223,13 +223,12 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
 #define convertToWT2
 #define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
 
-#elif defined OP_CONVERT
-#define PROCESS_ELEM dstelem = convertToDT(srcelem1)
-
-#elif defined OP_CONVERT_SCALE
+#elif defined OP_CONVERT_SCALE_ABS
 #undef EXTRA_PARAMS
 #define EXTRA_PARAMS , workT alpha, workT beta
-#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta)
+#define PROCESS_ELEM \
+    workT value = srcelem1 * alpha + beta; \
+    dstelem = convertToDT(value >= 0 ? value : -value)
 
 #elif defined OP_CTP_AD || defined OP_CTP_AR
 #ifdef OP_CTP_AD
diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp
index 3aa47b7d2..df692b818 100644
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@@ -1324,6 +1324,23 @@ OCL_TEST_P(InRange, Scalar)
 }
 
 
+//////////////////////////////// ConvertScaleAbs ////////////////////////////////////////////////
+
+typedef ArithmTestBase ConvertScaleAbs;
+
+OCL_TEST_P(ConvertScaleAbs, Mat)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(cv::convertScaleAbs(src1_roi, dst1_roi, val[0], val[1]));
+        OCL_ON(cv::convertScaleAbs(usrc1_roi, udst1_roi, val[0], val[1]));
+
+        Near(depth <= CV_32S ? 1 : 1e-6);
+    }
+}
+
 //////////////////////////////////////// Instantiation /////////////////////////////////////////
 
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
@@ -1360,6 +1377,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNE
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 
 } } // namespace cvtest::ocl
 

From 6b64257c811ff63effa95026950d2dca14efd95e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Sun, 29 Dec 2013 18:46:25 +0400
Subject: [PATCH 2/3] added OpenCL version of cv::scaleAdd

---
 modules/core/src/matmul.cpp           | 48 +++++++++++++++++++++++++--
 modules/core/src/opencl/arithm.cl     |  8 +++++
 modules/core/test/ocl/test_arithm.cpp | 19 ++++++++++-
 3 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp
index dc90ac447..3081676f5 100644
--- a/modules/core/src/matmul.cpp
+++ b/modules/core/src/matmul.cpp
@@ -41,6 +41,7 @@
 //M*/
 
 #include "precomp.hpp"
+#include "opencl_kernels.hpp"
 #include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
 
 #ifdef HAVE_IPP
@@ -2154,20 +2155,61 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst,
 
 typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, int len, const void* alpha);
 
+static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
+{
+    int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F);
+    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
+    Size size = _src1.size();
+
+    if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() )
+        return false;
+
+    char cvt[2][50];
+    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
+                  format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
+                         " -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s%s", ocl::typeToStr(depth),
+                         ocl::typeToStr(wdepth), ocl::convertTypeStr(depth, wdepth, 1, cvt[0]),
+                         ocl::convertTypeStr(wdepth, depth, 1, cvt[1]),
+                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
+    if (k.empty())
+        return false;
+
+    _dst.create(size, type);
+    UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat();
+
+    ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
+            src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
+            dstarg = ocl::KernelArg::WriteOnly(dst, cn);
+
+    if (wdepth == CV_32F)
+        k.args(src1arg, src2arg, dstarg, (float)alpha);
+    else
+        k.args(src1arg, src2arg, dstarg, alpha);
+
+    size_t globalsize[2] = { dst.cols * cn, dst.rows };
+    return k.run(2, globalsize, NULL, false);
+}
+
 }
 
 void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst )
 {
-    Mat src1 = _src1.getMat(), src2 = _src2.getMat();
-    int depth = src1.depth(), cn = src1.channels();
+    int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+    CV_Assert( type == _src2.type() );
+
+    if (ocl::useOpenCL() && _src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat() &&
+            ocl_scaleAdd(_src1, alpha, _src2, _dst, type))
+        return;
 
-    CV_Assert( src1.type() == src2.type() );
     if( depth < CV_32F )
     {
         addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth);
         return;
     }
 
+    Mat src1 = _src1.getMat(), src2 = _src2.getMat();
+    CV_Assert(src1.size == src2.size);
+
     _dst.create(src1.dims, src1.size, src1.type());
     Mat dst = _dst.getMat();
 
diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl
index add4b0695..605fe4785 100644
--- a/modules/core/src/opencl/arithm.cl
+++ b/modules/core/src/opencl/arithm.cl
@@ -91,6 +91,9 @@
 
 #else
 
+    #ifndef convertToWT2
+    #define convertToWT2 convertToWT1
+    #endif
     #define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index))
     #define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index))
 
@@ -230,6 +233,11 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
     workT value = srcelem1 * alpha + beta; \
     dstelem = convertToDT(value >= 0 ? value : -value)
 
+#elif defined OP_SCALE_ADD
+#undef EXTRA_PARAMS
+#define EXTRA_PARAMS , workT alpha
+#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * alpha + srcelem2)
+
 #elif defined OP_CTP_AD || defined OP_CTP_AR
 #ifdef OP_CTP_AD
 #define TO_DEGREE cartToPolar *= (180 / CV_PI);
diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp
index df692b818..f2b987514 100644
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@@ -1323,7 +1323,6 @@ OCL_TEST_P(InRange, Scalar)
     }
 }
 
-
 //////////////////////////////// ConvertScaleAbs ////////////////////////////////////////////////
 
 typedef ArithmTestBase ConvertScaleAbs;
@@ -1341,6 +1340,23 @@ OCL_TEST_P(ConvertScaleAbs, Mat)
     }
 }
 
+//////////////////////////////// ScaleAdd ////////////////////////////////////////////////
+
+typedef ArithmTestBase ScaleAdd;
+
+OCL_TEST_P(ScaleAdd, Mat)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi));
+        OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi));
+
+        Near(depth <= CV_32S ? 1 : 1e-6);
+    }
+}
+
 //////////////////////////////////////// Instantiation /////////////////////////////////////////
 
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
@@ -1378,6 +1394,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_6
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 
 } } // namespace cvtest::ocl
 

From 3e1bec52486bab3002e39fd912727b1a85d0a30a Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 30 Dec 2013 01:21:04 +0400
Subject: [PATCH 3/3] added OpenCL version of cv::patchNaNs

---
 modules/core/src/mathfuncs.cpp        | 23 +++++++++-
 modules/core/src/opencl/arithm.cl     |  7 ++++
 modules/core/test/ocl/test_arithm.cpp | 60 +++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/modules/core/src/mathfuncs.cpp b/modules/core/src/mathfuncs.cpp
index 0b596071a..90e0d74a4 100644
--- a/modules/core/src/mathfuncs.cpp
+++ b/modules/core/src/mathfuncs.cpp
@@ -2364,12 +2364,31 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
     return badPt.x < 0;
 }
 
+static bool ocl_patchNaNs( InputOutputArray _a, float value )
+{
+    ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
+                     format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=int"));
+    if (k.empty())
+        return false;
+
+    UMat a = _a.getUMat();
+    int cn = a.channels();
+
+    k.args(ocl::KernelArg::ReadOnlyNoSize(a),
+           ocl::KernelArg::WriteOnly(a), (float)value);
+
+    size_t globalsize[2] = { a.cols * cn, a.rows };
+    return k.run(2, globalsize, NULL, false);
+}
 
 void patchNaNs( InputOutputArray _a, double _val )
 {
-    Mat a = _a.getMat();
-    CV_Assert( a.depth() == CV_32F );
+    CV_Assert( _a.depth() == CV_32F );
 
+    if (ocl::useOpenCL() && _a.isUMat() && _a.dims() <= 2 && ocl_patchNaNs(_a, (float)_val))
+        return;
+
+    Mat a = _a.getMat();
     const Mat* arrays[] = {&a, 0};
     int* ptrs[1];
     NAryMatIterator it(arrays, (uchar**)ptrs);
diff --git a/modules/core/src/opencl/arithm.cl b/modules/core/src/opencl/arithm.cl
index 605fe4785..c8fd99eef 100644
--- a/modules/core/src/opencl/arithm.cl
+++ b/modules/core/src/opencl/arithm.cl
@@ -271,6 +271,13 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
     dstelem = cos(alpha) * x; \
     dstelem2 = sin(alpha) * x
 
+#elif defined OP_PATCH_NANS
+#undef EXTRA_PARAMS
+#define EXTRA_PARAMS , int val
+#define PROCESS_ELEM \
+    if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \
+        dstelem = val
+
 #else
 #error "unknown op type"
 #endif
diff --git a/modules/core/test/ocl/test_arithm.cpp b/modules/core/test/ocl/test_arithm.cpp
index f2b987514..03d842218 100644
--- a/modules/core/test/ocl/test_arithm.cpp
+++ b/modules/core/test/ocl/test_arithm.cpp
@@ -42,6 +42,8 @@
 #include "test_precomp.hpp"
 #include "opencv2/ts/ocl_test.hpp"
 
+#include <cmath>
+
 #ifdef HAVE_OPENCL
 
 namespace cvtest {
@@ -1357,6 +1359,63 @@ OCL_TEST_P(ScaleAdd, Mat)
     }
 }
 
+//////////////////////////////// PatchNans ////////////////////////////////////////////////
+
+PARAM_TEST_CASE(PatchNaNs, Channels, bool)
+{
+    int cn;
+    bool use_roi;
+    double value;
+
+    TEST_DECLARE_INPUT_PARAMETER(src)
+
+    virtual void SetUp()
+    {
+        cn = GET_PARAM(0);
+        use_roi = GET_PARAM(1);
+    }
+
+    virtual void generateTestData()
+    {
+        const int type = CV_MAKE_TYPE(CV_32F, cn);
+
+        Size roiSize = randomSize(1, 10);
+        Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
+        randomSubMat(src, src_roi, roiSize, srcBorder, type, -40, 40);
+
+        // generating NaNs
+        roiSize.width *= cn;
+        for (int y = 0; y < roiSize.height; ++y)
+        {
+            float * const ptr = src_roi.ptr<float>(y);
+            for (int x = 0; x < roiSize.width; ++x)
+                ptr[x] = randomInt(-1, 1) == 0 ? std::numeric_limits<float>::quiet_NaN() : ptr[x];
+        }
+
+        value = randomDouble(-100, 100);
+
+        UMAT_UPLOAD_INPUT_PARAMETER(src)
+    }
+
+    void Near()
+    {
+        OCL_EXPECT_MATS_NEAR(src, 0)
+    }
+};
+
+OCL_TEST_P(PatchNaNs, Mat)
+{
+    for (int j = 0; j < test_loop_times; j++)
+    {
+        generateTestData();
+
+        OCL_OFF(cv::patchNaNs(src_roi, value));
+        OCL_ON(cv::patchNaNs(usrc_roi, value));
+
+        Near();
+    }
+}
+
 //////////////////////////////////////// Instantiation /////////////////////////////////////////
 
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
@@ -1395,6 +1454,7 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Ch
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
 OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
+OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool()));
 
 } } // namespace cvtest::ocl