From a8426e1c12b030356c75644439af4ac151c0932b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 4 Nov 2013 15:09:58 +0400
Subject: [PATCH 1/3] fixed ocl::cornerHarris, ocl::cornerMinEigenVal and their
 accuracy tests

---
 modules/ocl/src/opencl/imgproc_calcHarris.cl  |  4 ++-
 .../ocl/src/opencl/imgproc_calcMinEigenVal.cl |  4 ++-
 modules/ocl/test/test_imgproc.cpp             | 30 +++++++++----------
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/modules/ocl/src/opencl/imgproc_calcHarris.cl b/modules/ocl/src/opencl/imgproc_calcHarris.cl
index 3f53ddf9a..02811dd69 100644
--- a/modules/ocl/src/opencl/imgproc_calcHarris.cl
+++ b/modules/ocl/src/opencl/imgproc_calcHarris.cl
@@ -125,10 +125,12 @@ __kernel void calcHarris(__global const float *Dx, __global const float *Dy, __g
         int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
         float dx_s = dx_con ? Dx[indexDx] : 0.0f;
         dx_data[i] = dx_s;
+
         bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
         int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dx_con ? Dy[indexDy] : 0.0f;
+        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
         dy_data[i] = dy_s;
+
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
         data[2][i] = dy_data[i] * dy_data[i];
diff --git a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
index c598246ae..7cb4c8ff3 100644
--- a/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
+++ b/modules/ocl/src/opencl/imgproc_calcMinEigenVal.cl
@@ -124,10 +124,12 @@ __kernel void calcMinEigenVal(__global const float *Dx,__global const float *Dy,
         int indexDx = (dx_startY+i)*(dx_step>>2)+(dx_startX+col);
         float dx_s = dx_con ? Dx[indexDx] : 0.0f;
         dx_data[i] = dx_s;
+
         bool dy_con = dy_startX+col >= 0 && dy_startX+col < dy_whole_cols && dy_startY+i >= 0 && dy_startY+i < dy_whole_rows;
         int indexDy = (dy_startY+i)*(dy_step>>2)+(dy_startX+col);
-        float dy_s = dx_con ? Dy[indexDy] : 0.0f;
+        float dy_s = dy_con ? Dy[indexDy] : 0.0f;
         dy_data[i] = dy_s;
+
         data[0][i] = dx_data[i] * dx_data[i];
         data[1][i] = dx_data[i] * dy_data[i];
         data[2][i] = dy_data[i] * dy_data[i];
diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 7e4b14eca..634633a2a 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -93,22 +93,14 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType,
         generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
     }
 
-    void Near(double threshold = 0.0, bool relative = false)
+    void Near(double threshold = 0.0)
     {
         Mat roi, whole;
         gdst_whole.download(whole);
         gdst_roi.download(roi);
 
-        if (relative)
-        {
-            EXPECT_MAT_NEAR_RELATIVE(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR_RELATIVE(dst_roi, roi, threshold);
-        }
-        else
-        {
-            EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-            EXPECT_MAT_NEAR(dst_roi, roi, threshold);
-        }
+        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
+        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
     }
 };
 
@@ -207,11 +199,19 @@ struct CornerTestBase :
         Mat image = readImageType("gpu/stereobm/aloe-L.png", type);
         ASSERT_FALSE(image.empty());
 
+        bool isFP = CV_MAT_DEPTH(type) >= CV_32F;
+        float val = 255.0f;
+        if (isFP)
+        {
+            image.convertTo(image, -1, 1.0 / 255);
+            val /= 255.0f;
+        }
+
         Size roiSize = image.size();
         Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
 
         Size wholeSize = Size(roiSize.width + srcBorder.lef + srcBorder.rig, roiSize.height + srcBorder.top + srcBorder.bot);
-        src = randomMat(wholeSize, type, -255, 255, false);
+        src = randomMat(wholeSize, type, -val, val, false);
         src_roi = src(Rect(srcBorder.lef, srcBorder.top, roiSize.width, roiSize.height));
         image.copyTo(src_roi);
 
@@ -236,7 +236,7 @@ OCL_TEST_P(CornerMinEigenVal, Mat)
         cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType);
         ocl::cornerMinEigenVal(gsrc_roi, gdst_roi, blockSize, apertureSize, borderType);
 
-        Near(1e-5, true);
+        Near(1e-6);
     }
 }
 
@@ -256,7 +256,7 @@ OCL_TEST_P(CornerHarris, Mat)
         cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType);
         ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType);
 
-        Near(1e-5, true);
+        Near(1e-6);
     }
 }
 
@@ -522,7 +522,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerMinEigenVal, Combine(
                             Bool()));
 
 INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
-                            Values((MatType)CV_8UC1), // TODO does not work properly with CV_32FC1
+                            Values((MatType)CV_8UC1, CV_32FC1),
                             Values(3, 5),
                             Values( (int)BORDER_CONSTANT, (int)BORDER_REPLICATE, (int)BORDER_REFLECT, (int)BORDER_REFLECT_101),
                             Bool()));

From c89dfd333c8f93c5c40a12621d95ac300b9885d2 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 4 Nov 2013 15:30:00 +0400
Subject: [PATCH 2/3] fixed warnings in OpenCL kernels

---
 modules/ocl/src/opencl/bgfg_mog.cl            | 22 +++++++++----------
 modules/ocl/src/opencl/haarobjectdetect.cl    |  1 -
 .../src/opencl/haarobjectdetect_scaled2.cl    |  2 --
 modules/ocl/src/opencl/tvl1flow.cl            | 16 +++-----------
 4 files changed, 14 insertions(+), 27 deletions(-)

diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
index 8621ff31b..a13a30e90 100644
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -48,22 +48,22 @@
 #define T_MEAN_VAR float
 #define CONVERT_TYPE convert_uchar_sat
 #define F_ZERO (0.0f)
-float cvt(uchar val)
+inline float cvt(uchar val)
 {
     return val;
 }
 
-float sqr(float val)
+inline float sqr(float val)
 {
     return val * val;
 }
 
-float sum(float val)
+inline float sum(float val)
 {
     return val;
 }
 
-float clamp1(float var, float learningRate, float diff, float minVar)
+static float clamp1(float var, float learningRate, float diff, float minVar)
 {
     return fmax(var + learningRate * (diff * diff - var), minVar);
 }
@@ -72,7 +72,7 @@ float clamp1(float var, float learningRate, float diff, float minVar)
 #define T_MEAN_VAR float4
 #define CONVERT_TYPE convert_uchar4_sat
 #define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
-float4 cvt(const uchar4 val)
+inline float4 cvt(const uchar4 val)
 {
     float4 result;
     result.x = val.x;
@@ -83,17 +83,17 @@ float4 cvt(const uchar4 val)
     return result;
 }
 
-float sqr(const float4 val)
+inline float sqr(const float4 val)
 {
     return val.x * val.x + val.y * val.y + val.z * val.z;
 }
 
-float sum(const float4 val)
+inline float sum(const float4 val)
 {
     return (val.x + val.y + val.z);
 }
 
-float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
+static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
 {
     float4 result;
     result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
@@ -116,14 +116,14 @@ typedef struct
     uchar c_shadowVal;
 }con_srtuct_t;
 
-void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
+static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
 {
     float val = ptr[(k * rows + y) * ptr_step + x];
     ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
     ptr[((k + 1) * rows + y) * ptr_step + x] = val;
 }
 
-void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
 {
     float4 val = ptr[(k * rows + y) * ptr_step + x];
     ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
@@ -412,7 +412,7 @@ __kernel void mog2_kernel(__global T_FRAME * frame, __global int* fgmask, __glob
 
             if (_weight < -prune)
             {
-                _weight = 0.0;
+                _weight = 0.0f;
                 nmodes--;
             }
 
diff --git a/modules/ocl/src/opencl/haarobjectdetect.cl b/modules/ocl/src/opencl/haarobjectdetect.cl
index 58ebb4c01..a62b3af8c 100644
--- a/modules/ocl/src/opencl/haarobjectdetect.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect.cl
@@ -292,7 +292,6 @@ __kernel void __attribute__((reqd_work_group_size(8,8,1)))gpuRunHaarClassifierCa
     for(int scalei = 0; scalei <loopcount; scalei++)
     {
         int4 scaleinfo1= info[scalei];
-        int width = (scaleinfo1.x & 0xffff0000) >> 16;
         int height = scaleinfo1.x & 0xffff;
         int grpnumperline =(scaleinfo1.y & 0xffff0000) >> 16;
         int totalgrp = scaleinfo1.y & 0xffff;
diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
index 3ace4470a..72b94038c 100644
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -136,8 +136,6 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
     {
         int4 scaleinfo1;
         scaleinfo1 = info[scalei];
-        int width = (scaleinfo1.x & 0xffff0000) >> 16;
-        int height = scaleinfo1.x & 0xffff;
         int grpnumperline = (scaleinfo1.y & 0xffff0000) >> 16;
         int totalgrp = scaleinfo1.y & 0xffff;
         float factor = as_float(scaleinfo1.w);
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl
index ca60fb70f..2787f00dc 100644
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -69,23 +69,16 @@ __global float* dx, __global float* dy, int dx_step)
 
 }
 
-float bicubicCoeff(float x_)
+static float bicubicCoeff(float x_)
 {
 
     float x = fabs(x_);
     if (x <= 1.0f)
-    {
         return x * x * (1.5f * x - 2.5f) + 1.0f;
-    }
     else if (x < 2.0f)
-    {
         return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
-    }
     else
-    {
         return 0.0f;
-    }
-
 }
 
 __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
@@ -170,12 +163,10 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
 
 }
 
-float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
+static float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
 {
     int i0 = clamp(x, 0, cols - 1);
     int j0 = clamp(y, 0, rows - 1);
-    int i1 = clamp(x + 1, 0, cols - 1);
-    int j1 = clamp(y + 1, 0, rows - 1);
 
     return image[j0 * elemCntPerRow + i0];
 }
@@ -303,7 +294,7 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
 
 }
 
-float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
+static float divergence(__global const float* v1, __global const float* v2, int y, int x, int v1_step, int v2_step)
 {
 
     if (x > 0 && y > 0)
@@ -407,5 +398,4 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
             error[y * I1wx_step + x] = n1 + n2;
         }
     }
-
 }

From e7e7e04dce59258843703ef5d44ad6b61d568aa6 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Tue, 5 Nov 2013 14:17:31 +0400
Subject: [PATCH 3/3] came back to relative error

---
 modules/ocl/test/test_imgproc.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index 634633a2a..c7099a10c 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -93,14 +93,22 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType,
         generateOclMat(gdst_whole, gdst_roi, dst_whole, roiSize, dstBorder);
     }
 
-    void Near(double threshold = 0.0)
+    void Near(double threshold = 0.0, bool relative = false)
     {
         Mat roi, whole;
         gdst_whole.download(whole);
         gdst_roi.download(roi);
 
-        EXPECT_MAT_NEAR(dst_whole, whole, threshold);
-        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
+        if (relative)
+        {
+            EXPECT_MAT_NEAR_RELATIVE(dst_whole, whole, threshold);
+            EXPECT_MAT_NEAR_RELATIVE(dst_roi, roi, threshold);
+        }
+        else
+        {
+            EXPECT_MAT_NEAR(dst_whole, whole, threshold);
+            EXPECT_MAT_NEAR(dst_roi, roi, threshold);
+        }
     }
 };
 
@@ -236,7 +244,7 @@ OCL_TEST_P(CornerMinEigenVal, Mat)
         cornerMinEigenVal(src_roi, dst_roi, blockSize, apertureSize, borderType);
         ocl::cornerMinEigenVal(gsrc_roi, gdst_roi, blockSize, apertureSize, borderType);
 
-        Near(1e-6);
+        Near(1e-5, true);
     }
 }
 
@@ -256,7 +264,7 @@ OCL_TEST_P(CornerHarris, Mat)
         cornerHarris(src_roi, dst_roi, blockSize, apertureSize, k, borderType);
         ocl::cornerHarris(gsrc_roi, gdst_roi, blockSize, apertureSize, k, borderType);
 
-        Near(1e-6);
+        Near(1e-5, true);
     }
 }