From 760b718981cc87530c04d708547ec97bc4ccecc8 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 7 Nov 2013 19:50:06 +0400
Subject: [PATCH 01/45] added CV_16SC2 && CV_16UC1 map types support to
 ocl::remap (INTER_LINEAR mode)

---
 modules/ocl/src/imgproc.cpp             | 10 ++--
 modules/ocl/src/opencl/imgproc_remap.cl | 72 +++++++++++++++++++++----
 modules/ocl/test/test_warp.cpp          |  8 ++-
 3 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index 193cb43a6..9232f49bc 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -198,10 +198,8 @@ namespace cv
             if (map1.empty())
                 map1.swap(map2);
 
-            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST
-                      /*|| interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);
-            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (interpolation == INTER_NEAREST &&
-                                                                    (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) )) ||
+            CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
+            CV_Assert((map1.type() == CV_16SC2 && (map2.empty() || (map2.type() == CV_16UC1 || map2.type() == CV_16SC1)) ) ||
                       (map1.type() == CV_32FC2 && !map2.data) ||
                       (map1.type() == CV_32FC1 && map2.type() == CV_32FC1));
             CV_Assert(!map2.data || map2.size() == map1.size());
@@ -231,8 +229,8 @@ namespace cv
                 CV_Error(CV_StsBadArg, "Unsupported map types");
 
             int ocn = dst.oclchannels();
-            size_t localThreads[3] = { 16, 16, 1};
-            size_t globalThreads[3] = { dst.cols, dst.rows, 1};
+            size_t localThreads[3] = { 256, 1, 1 };
+            size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
 
             Mat scalar(1, 1, CV_MAKE_TYPE(dst.depth(), ocn), borderValue);
             std::string buildOptions = format("-D %s -D %s -D T=%s%s", interMap[interpolation],
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl
index b623091ed..340e741cc 100644
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -243,6 +243,60 @@ __kernel void remap_16SC2_16UC1(__global const T * restrict src, __global T * ds
 
 #elif INTER_LINEAR
 
+__kernel void remap_16SC2_16UC1(__global T const * restrict src, __global T * dst,
+        __global short2 * restrict map1, __global ushort * restrict map2,
+        int src_offset, int dst_offset, int map1_offset, int map2_offset,
+        int src_step, int dst_step, int map1_step, int map2_step,
+        int src_cols, int src_rows, int dst_cols, int dst_rows, T nVal)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (x < dst_cols && y < dst_rows)
+    {
+        int dstIdx = mad24(y, dst_step, x + dst_offset);
+        int map1Idx = mad24(y, map1_step, x + map1_offset);
+        int map2Idx = mad24(y, map2_step, x + map2_offset);
+
+        int2 map_dataA = convert_int2(map1[map1Idx]);
+        int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
+        int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
+        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
+
+        ushort map2Value = (ushort)(map2[map2Idx] & (INTER_TAB_SIZE2 - 1));
+        WT2 u = (WT2)(map2Value & (INTER_TAB_SIZE - 1), map2Value >> INTER_BITS) / (WT2)(INTER_TAB_SIZE);
+
+        WT scalar = convertToWT(nVal);
+        WT a = scalar, b = scalar, c = scalar, d = scalar;
+
+        if (!NEED_EXTRAPOLATION(map_dataA.x, map_dataA.y))
+            a = convertToWT(src[mad24(map_dataA.y, src_step, map_dataA.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataA, a);
+
+        if (!NEED_EXTRAPOLATION(map_dataB.x, map_dataB.y))
+            b = convertToWT(src[mad24(map_dataB.y, src_step, map_dataB.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataB, b);
+
+        if (!NEED_EXTRAPOLATION(map_dataC.x, map_dataC.y))
+            c = convertToWT(src[mad24(map_dataC.y, src_step, map_dataC.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataC, c);
+
+        if (!NEED_EXTRAPOLATION(map_dataD.x, map_dataD.y))
+            d = convertToWT(src[mad24(map_dataD.y, src_step, map_dataD.x + src_offset)]);
+        else
+            EXTRAPOLATE(map_dataD, d);
+
+        WT dst_data = a * (1 - u.x) * (1 - u.y) +
+                      b * (u.x)     * (1 - u.y) +
+                      c * (1 - u.x) * (u.y) +
+                      d * (u.x)     * (u.y);
+        dst[dstIdx] = convertToT(dst_data);
+    }
+}
+
 __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         __global float * map1, __global float * map2,
         int src_offset, int dst_offset, int map1_offset, int map2_offset,
@@ -263,7 +317,7 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         int2 map_dataA = convert_int2_sat_rtn(map_data);
         int2 map_dataB = (int2)(map_dataA.x + 1, map_dataA.y);
         int2 map_dataC = (int2)(map_dataA.x, map_dataA.y + 1);
-        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y +1);
+        int2 map_dataD = (int2)(map_dataA.x + 1, map_dataA.y + 1);
 
         float2 _u = map_data - convert_float2(map_dataA);
         WT2 u = convertToWT2(convert_int2_rte(convertToWT2(_u) * (WT2)INTER_TAB_SIZE)) / (WT2)INTER_TAB_SIZE;
@@ -290,10 +344,10 @@ __kernel void remap_2_32FC1(__global T const * restrict  src, __global T * dst,
         else
             EXTRAPOLATE(map_dataD, d);
 
-        WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
-                      b * (WT)(u.x)     * (WT)(1 - u.y) +
-                      c * (WT)(1 - u.x) * (WT)(u.y) +
-                      d * (WT)(u.x)     * (WT)(u.y);
+        WT dst_data = a * (1 - u.x) * (1 - u.y) +
+                      b * (u.x)     * (1 - u.y) +
+                      c * (1 - u.x) * (u.y) +
+                      d * (u.x)     * (u.y);
         dst[dstIdx] = convertToT(dst_data);
     }
 }
@@ -343,10 +397,10 @@ __kernel void remap_32FC2(__global T const * restrict  src, __global T * dst,
         else
             EXTRAPOLATE(map_dataD, d);
 
-        WT dst_data = a * (WT)(1 - u.x) * (WT)(1 - u.y) +
-                      b * (WT)(u.x)     * (WT)(1 - u.y) +
-                      c * (WT)(1 - u.x) * (WT)(u.y) +
-                      d * (WT)(u.x)     * (WT)(u.y);
+        WT dst_data = a * (1 - u.x) * (1 - u.y) +
+                      b * (u.x)     * (1 - u.y) +
+                      c * (1 - u.x) * (u.y) +
+                      d * (u.x)     * (u.y);
         dst[dstIdx] = convertToT(dst_data);
     }
 }
diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index b9231d116..016a04217 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -205,7 +205,12 @@ PARAM_TEST_CASE(Remap, MatDepth, Channels, pair<MatType, MatType>, Border, bool)
 
         Border map2Border = randomBorder(0, useRoi ? MAX_VALUE : 0);
         if (map2Type != noType)
-            randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, -mapMaxValue, mapMaxValue);
+        {
+            int mapMinValue = -mapMaxValue;
+            if (map2Type == CV_16UC1 || map2Type == CV_16SC1)
+                mapMinValue = 0, mapMaxValue = INTER_TAB_SIZE2;
+            randomSubMat(map2, map2_roi, dstROISize, map2Border, map2Type, mapMinValue, mapMaxValue);
+        }
 
         generateOclMat(gsrc, gsrc_roi, src, srcROISize, srcBorder);
         generateOclMat(gdst, gdst_roi, dst, dstROISize, dstBorder);
@@ -342,6 +347,7 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
                             Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
                             Values(1, 2, 3, 4),
                             Values(pair<MatType, MatType>((MatType)CV_32FC1, (MatType)CV_32FC1),
+                                   pair<MatType, MatType>((MatType)CV_16SC2, (MatType)CV_16UC1),
                                    pair<MatType, MatType>((MatType)CV_32FC2, noType)),
                             Values((Border)BORDER_CONSTANT,
                                    (Border)BORDER_REPLICATE,

From 4248f8221146a929b1ac98f24d58c699e8dafa03 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 8 Nov 2013 18:40:53 +0400
Subject: [PATCH 02/45] added ROI support to ocl::buildWarp*Maps functions

---
 modules/ocl/src/build_warps.cpp       | 144 ++++++++++++++-----------
 modules/ocl/src/opencl/build_warps.cl | 149 +++++++++++---------------
 2 files changed, 141 insertions(+), 152 deletions(-)

diff --git a/modules/ocl/src/build_warps.cpp b/modules/ocl/src/build_warps.cpp
index dc9ab66db..40c082b55 100644
--- a/modules/ocl/src/build_warps.cpp
+++ b/modules/ocl/src/build_warps.cpp
@@ -53,7 +53,7 @@ using namespace cv::ocl;
 // buildWarpPlaneMaps
 
 void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, const Mat &T,
-                                 float scale, oclMat &map_x, oclMat &map_y)
+                                 float scale, oclMat &xmap, oclMat &ymap)
 {
     CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
     CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
@@ -68,37 +68,40 @@ void cv::ocl::buildWarpPlaneMaps(Size /*src_size*/, Rect dst_roi, const Mat &K,
 
     oclMat KRT_oclMat(KRT_mat);
     // transfer K_Rinv and T into a single cl_mem
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
+    xmap.create(dst_roi.size(), CV_32F);
+    ymap.create(dst_roi.size(), CV_32F);
 
     int tl_u = dst_roi.tl().x;
     int tl_v = dst_roi.tl().y;
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpPlaneMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&KRT_mat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
     args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
 
-    size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPlaneMaps", globalThreads, localThreads, args, -1, -1);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpCylyndricalMaps
 
 void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
-                                       oclMat &map_x, oclMat &map_y)
+                                       oclMat &xmap, oclMat &ymap)
 {
     CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
     CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
@@ -108,36 +111,40 @@ void cv::ocl::buildWarpCylindricalMaps(Size /*src_size*/, Rect dst_roi, const Ma
 
     oclMat KR_oclMat(K_Rinv.reshape(1, 1));
 
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
+    xmap.create(dst_roi.size(), CV_32F);
+    ymap.create(dst_roi.size(), CV_32F);
 
     int tl_u = dst_roi.tl().x;
     int tl_v = dst_roi.tl().y;
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpCylindricalMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
     args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
 
-    size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpCylindricalMaps", globalThreads, localThreads, args, -1, -1);
 }
 
 //////////////////////////////////////////////////////////////////////////////
 // buildWarpSphericalMaps
+
 void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat &K, const Mat &R, float scale,
-                                     oclMat &map_x, oclMat &map_y)
+                                     oclMat &xmap, oclMat &ymap)
 {
     CV_Assert(K.size() == Size(3, 3) && K.type() == CV_32F);
     CV_Assert(R.size() == Size(3, 3) && R.type() == CV_32F);
@@ -147,37 +154,41 @@ void cv::ocl::buildWarpSphericalMaps(Size /*src_size*/, Rect dst_roi, const Mat
 
     oclMat KR_oclMat(K_Rinv.reshape(1, 1));
     // transfer K_Rinv, R_Kinv into a single cl_mem
-    map_x.create(dst_roi.size(), CV_32F);
-    map_y.create(dst_roi.size(), CV_32F);
+    xmap.create(dst_roi.size(), CV_32F);
+    ymap.create(dst_roi.size(), CV_32F);
 
     int tl_u = dst_roi.tl().x;
     int tl_v = dst_roi.tl().y;
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpSphericalMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_x.data));
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&map_y.data));
+    vector< pair<size_t, const void *> > args;
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&KR_oclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_u));
     args.push_back( make_pair( sizeof(cl_int), (void *)&tl_v));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.cols));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_x.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&map_y.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
     args.push_back( make_pair( sizeof(cl_float), (void *)&scale));
 
-    size_t globalThreads[3] = {map_x.cols, map_x.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpSphericalMaps", globalThreads, localThreads, args, -1, -1);
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// buildWarpAffineMaps
 
 void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
 {
-
     CV_Assert(M.rows == 2 && M.cols == 3);
+    CV_Assert(dsize.area());
 
     xmap.create(dsize, CV_32FC1);
     ymap.create(dsize, CV_32FC1);
@@ -194,29 +205,34 @@ void cv::ocl::buildWarpAffineMaps(const Mat &M, bool inverse, Size dsize, oclMat
         iM.convertTo(coeffsMat, coeffsMat.type());
     }
 
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
+
     oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpAffineMaps";
     vector< pair<size_t, const void *> > args;
-
     args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
 
-    size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+    size_t localThreads[3]  = { 32, 8, 1 };
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpAffineMaps", globalThreads, localThreads, args, -1, -1);
 }
 
+//////////////////////////////////////////////////////////////////////////////
+// buildWarpPerspectiveMaps
+
 void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, oclMat &xmap, oclMat &ymap)
 {
-
     CV_Assert(M.rows == 3 && M.cols == 3);
+    CV_Assert(dsize.area() > 0);
 
     xmap.create(dsize, CV_32FC1);
     ymap.create(dsize, CV_32FC1);
@@ -235,19 +251,21 @@ void cv::ocl::buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, o
 
     oclMat coeffsOclMat(coeffsMat.reshape(1, 1));
 
-    Context *clCxt = Context::getContext();
-    string kernelName = "buildWarpPerspectiveMaps";
-    vector< pair<size_t, const void *> > args;
+    int xmap_step = xmap.step / xmap.elemSize(), xmap_offset = xmap.offset / xmap.elemSize();
+    int ymap_step = ymap.step / ymap.elemSize(), ymap_offset = ymap.offset / ymap.elemSize();
 
+    vector< pair<size_t, const void *> > args;
     args.push_back( make_pair( sizeof(cl_mem), (void *)&xmap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&ymap.data));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&coeffsOclMat.data));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.cols));
     args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.rows));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap.step));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap.step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_step));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&xmap_offset));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&ymap_offset));
 
-    size_t globalThreads[3] = {xmap.cols, xmap.rows, 1};
-    size_t localThreads[3]  = {32, 8, 1};
-    openCLExecuteKernel(clCxt, &build_warps, kernelName, globalThreads, localThreads, args, -1, -1);
+    size_t globalThreads[3] = { xmap.cols, xmap.rows, 1 };
+
+    openCLExecuteKernel(Context::getContext(), &build_warps, "buildWarpPerspectiveMaps", globalThreads, NULL, args, -1, -1);
 }
diff --git a/modules/ocl/src/opencl/build_warps.cl b/modules/ocl/src/opencl/build_warps.cl
index 4402e8c38..bd5e002b5 100644
--- a/modules/ocl/src/opencl/build_warps.cl
+++ b/modules/ocl/src/opencl/build_warps.cl
@@ -43,31 +43,25 @@
 //
 //M*/
 
-__kernel
-    void buildWarpPlaneMaps
-    (
-    __global float * map_x,
-    __global float * map_y,
-    __constant float * KRT,
-    int tl_u,
-    int tl_v,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y,
-    float scale
-    )
+__kernel void buildWarpPlaneMaps(__global float * xmap, __global float * ymap,
+                                 __constant float * KRT,
+                                 int tl_u, int tl_v,
+                                 int cols, int rows,
+                                 int xmap_step, int ymap_step,
+                                 int xmap_offset, int ymap_offset,
+                                 float scale)
 {
     int du = get_global_id(0);
     int dv = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     __constant float * ck_rinv = KRT;
     __constant float * ct      = KRT + 9;
 
     if (du < cols && dv < rows)
     {
+        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
+        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
+
         float u = tl_u + du;
         float v = tl_v + dv;
         float x, y;
@@ -83,33 +77,27 @@ __kernel
         x /= z;
         y /= z;
 
-        map_x[dv * step_x + du] = x;
-        map_y[dv * step_y + du] = y;
+        xmap[xmap_index] = x;
+        ymap[ymap_index] = y;
     }
 }
 
-__kernel
-    void buildWarpCylindricalMaps
-    (
-    __global float * map_x,
-    __global float * map_y,
-    __constant float * ck_rinv,
-    int tl_u,
-    int tl_v,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y,
-    float scale
-    )
+__kernel void buildWarpCylindricalMaps(__global float * xmap, __global float * ymap,
+                                       __constant float * ck_rinv,
+                                       int tl_u, int tl_v,
+                                       int cols, int rows,
+                                       int xmap_step, int ymap_step,
+                                       int xmap_offset, int ymap_offset,
+                                       float scale)
 {
     int du = get_global_id(0);
     int dv = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (du < cols && dv < rows)
     {
+        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
+        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
+
         float u = tl_u + du;
         float v = tl_v + dv;
         float x, y;
@@ -127,33 +115,27 @@ __kernel
         if (z > 0) { x /= z; y /= z; }
         else x = y = -1;
 
-        map_x[dv * step_x + du] = x;
-        map_y[dv * step_y + du] = y;
+        xmap[xmap_index] = x;
+        ymap[ymap_index] = y;
     }
 }
 
-__kernel
-    void buildWarpSphericalMaps
-    (
-    __global float * map_x,
-    __global float * map_y,
-    __constant float * ck_rinv,
-    int tl_u,
-    int tl_v,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y,
-    float scale
-    )
+__kernel void buildWarpSphericalMaps(__global float * xmap, __global float * ymap,
+                                     __constant float * ck_rinv,
+                                     int tl_u, int tl_v,
+                                     int cols, int rows,
+                                     int xmap_step, int ymap_step,
+                                     int xmap_offset, int ymap_offset,
+                                     float scale)
 {
     int du = get_global_id(0);
     int dv = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (du < cols && dv < rows)
     {
+        int xmap_index = mad24(dv, xmap_step, xmap_offset + du);
+        int ymap_index = mad24(dv, ymap_step, ymap_offset + du);
+
         float u = tl_u + du;
         float v = tl_v + dv;
         float x, y;
@@ -174,63 +156,52 @@ __kernel
         if (z > 0) { x /= z; y /= z; }
         else x = y = -1;
 
-        map_x[dv * step_x + du] = x;
-        map_y[dv * step_y + du] = y;
+        xmap[xmap_index] = x;
+        ymap[ymap_index] = y;
     }
 }
 
-__kernel
-    void buildWarpAffineMaps
-    (
-    __global float * xmap,
-    __global float * ymap,
-    __constant float * c_warpMat,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y
-    )
+__kernel void buildWarpAffineMaps(__global float * xmap, __global float * ymap,
+                                  __constant float * c_warpMat,
+                                  int cols, int rows,
+                                  int xmap_step, int ymap_step,
+                                  int xmap_offset, int ymap_offset)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (x < cols && y < rows)
     {
-        const float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
-        const float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
+        int xmap_index = mad24(y, xmap_step, x + xmap_offset);
+        int ymap_index = mad24(y, ymap_step, x + ymap_offset);
 
-        map_x[y * step_x + x] = xcoo;
-        map_y[y * step_y + x] = ycoo;
+        float xcoo = c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2];
+        float ycoo = c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5];
+
+        xmap[xmap_index] = xcoo;
+        ymap[ymap_index] = ycoo;
     }
 }
 
-__kernel
-    void buildWarpPerspectiveMaps
-    (
-    __global float * xmap,
-    __global float * ymap,
-    __constant float * c_warpMat,
-    int cols,
-    int rows,
-    int step_x,
-    int step_y
-    )
+__kernel void buildWarpPerspectiveMaps(__global float * xmap, __global float * ymap,
+                                       __constant float * c_warpMat,
+                                       int cols, int rows,
+                                       int xmap_step, int ymap_step,
+                                       int xmap_offset, int ymap_offset)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
-    step_x /= sizeof(float);
-    step_y /= sizeof(float);
 
     if (x < cols && y < rows)
     {
-        const float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
+        int xmap_index = mad24(y, xmap_step, x + xmap_offset);
+        int ymap_index = mad24(y, ymap_step, x + ymap_offset);
 
-        const float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
-        const float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
+        float coeff = 1.0f / (c_warpMat[6] * x + c_warpMat[7] * y + c_warpMat[8]);
+        float xcoo = coeff * (c_warpMat[0] * x + c_warpMat[1] * y + c_warpMat[2]);
+        float ycoo = coeff * (c_warpMat[3] * x + c_warpMat[4] * y + c_warpMat[5]);
 
-        map_x[y * step_x + x] = xcoo;
-        map_y[y * step_y + x] = ycoo;
+        xmap[xmap_index] = xcoo;
+        ymap[ymap_index] = ycoo;
     }
 }

From 8b57893e406c5df6dbbb29a2c805005b47db9428 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 8 Nov 2013 18:42:13 +0400
Subject: [PATCH 03/45] added an accuracy test for
 ocl::buildWarpPerspectiveMaps

---
 modules/ocl/test/test_warp.cpp | 110 +++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index b9231d116..3da73dc23 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -156,6 +156,114 @@ OCL_TEST_P(WarpPerspective, Mat)
     }
 }
 
+// buildWarpPerspectiveMaps
+
+PARAM_TEST_CASE(BuildWarpPerspectiveMaps, bool, bool)
+{
+    bool useRoi, mapInverse;
+    Size dsize;
+
+    Mat xmap_whole, ymap_whole, xmap_roi, ymap_roi;
+    ocl::oclMat gxmap_whole, gymap_whole, gxmap_roi, gymap_roi;
+
+    void SetUp()
+    {
+        mapInverse = GET_PARAM(0);
+        useRoi = GET_PARAM(1);
+    }
+
+    void random_roi()
+    {
+        dsize = randomSize(1, MAX_VALUE);
+
+        Border xmapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+        randomSubMat(xmap_whole, xmap_roi, dsize, xmapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
+
+        Border ymapBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
+        randomSubMat(ymap_whole, ymap_roi, dsize, ymapBorder, CV_32FC1, -MAX_VALUE, MAX_VALUE);
+
+        generateOclMat(gxmap_whole, gxmap_roi, xmap_whole, dsize, xmapBorder);
+        generateOclMat(gymap_whole, gymap_roi, ymap_whole, dsize, ymapBorder);
+    }
+
+    void Near(double threshold = 0.0)
+    {
+        Mat whole, roi;
+        gxmap_whole.download(whole);
+        gxmap_roi.download(roi);
+
+        EXPECT_MAT_NEAR(xmap_whole, whole, threshold);
+        EXPECT_MAT_NEAR(xmap_roi, roi, threshold);
+    }
+
+    void Near1(double threshold = 0.0)
+    {
+        Mat whole, roi;
+        gymap_whole.download(whole);
+        gymap_roi.download(roi);
+
+        EXPECT_MAT_NEAR(ymap_whole, whole, threshold);
+        EXPECT_MAT_NEAR(ymap_roi, roi, threshold);
+    }
+};
+
+static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
+{
+    CV_Assert(M.rows == 3 && M.cols == 3);
+    CV_Assert(dsize.area() > 0);
+
+    xmap.create(dsize, CV_32FC1);
+    ymap.create(dsize, CV_32FC1);
+
+    float coeffs[3 * 3];
+    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
+
+    if (inverse)
+        M.convertTo(coeffsMat, coeffsMat.type());
+    else
+    {
+        cv::Mat iM;
+        invert(M, iM);
+        iM.convertTo(coeffsMat, coeffsMat.type());
+    }
+
+    for (int y = 0; y < dsize.height; ++y)
+    {
+        float * const xmap_ptr = xmap.ptr<float>(y);
+        float * const ymap_ptr = ymap.ptr<float>(y);
+
+        for (int x = 0; x < dsize.width; ++x)
+        {
+            float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
+            xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
+            ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
+        }
+    }
+}
+
+OCL_TEST_P(BuildWarpPerspectiveMaps, Mat)
+{
+    for (int j = 0; j < LOOP_TIMES; j++)
+    {
+        random_roi();
+
+        float cols = static_cast<float>(MAX_VALUE), rows = static_cast<float>(MAX_VALUE);
+        float cols2 = cols / 2.0f, rows2 = rows / 2.0f;
+        Point2f sp[] = { Point2f(0.0f, 0.0f), Point2f(cols, 0.0f), Point2f(0.0f, rows), Point2f(cols, rows) };
+        Point2f dp[] = { Point2f(rng.uniform(0.0f, cols2), rng.uniform(0.0f, rows2)),
+            Point2f(rng.uniform(cols2, cols), rng.uniform(0.0f, rows2)),
+            Point2f(rng.uniform(0.0f, cols2), rng.uniform(rows2, rows)),
+            Point2f(rng.uniform(cols2, cols), rng.uniform(rows2, rows)) };
+        Mat M = getPerspectiveTransform(sp, dp);
+
+        buildWarpPerspectiveMaps(M, mapInverse, dsize, xmap_roi, ymap_roi);
+        ocl::buildWarpPerspectiveMaps(M, mapInverse, dsize, gxmap_roi, gymap_roi);
+
+        Near(1e-6);
+        Near1(1e-6);
+    }
+}
+
 /////////////////////////////////////////////////////////////////////////////////////////////////
 // remap
 
@@ -338,6 +446,8 @@ INSTANTIATE_TEST_CASE_P(ImgprocWarp, WarpPerspective, Combine(
                             Bool(),
                             Bool()));
 
+INSTANTIATE_TEST_CASE_P(ImgprocWarp, BuildWarpPerspectiveMaps, Combine(Bool(), Bool()));
+
 INSTANTIATE_TEST_CASE_P(ImgprocWarp, Remap_INTER_LINEAR, Combine(
                             Values(CV_8U, CV_16U, CV_16S, CV_32F, CV_64F),
                             Values(1, 2, 3, 4),

From eba6754b061efdcf9941fb2aed88cffa556e1f76 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 8 Nov 2013 19:07:06 +0400
Subject: [PATCH 04/45] fixed ocl::cvtColor for CV_YUV2BGRA and CV_YUV2RGBA

---
 modules/ocl/src/color.cpp           |  94 ++++++++++-----------
 modules/ocl/src/opencl/cvt_color.cl | 123 +++++++++++++---------------
 modules/ocl/test/test_color.cpp     |  85 +++++++++++++------
 3 files changed, 162 insertions(+), 140 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index 27c2fd5f0..766e992dd 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -50,52 +50,41 @@
 using namespace cv;
 using namespace cv::ocl;
 
-#ifndef CV_DESCALE
-#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
-#endif
-
-#ifndef FLT_EPSILON
-#define FLT_EPSILON     1.192092896e-07F
-#endif
-
-namespace
+static void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
 {
-
-void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
-{
-    int channels = src.oclchannels();
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
 
     std::string build_options = format("-D DEPTH_%d", src.depth());
 
+    vector<pair<size_t , const void *> > args;
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+
+    size_t gt[3] = { src.cols, src.rows, 1 }, lt[3] = { 16, 16, 1 };
+    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options.c_str());
+}
+
+static void Gray2RGB_caller(const oclMat &src, oclMat &dst)
+{
+    int channels = dst.channels();
+    std::string build_options = format("-D DEPTH_%d", src.depth());
+    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
+    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+
     vector<pair<size_t , const void *> > args;
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
-    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options.c_str());
-}
-
-void Gray2RGB_caller(const oclMat &src, oclMat &dst)
-{
-    std::string build_options = format("-D DEPTH_%d", src.depth());
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
@@ -105,9 +94,8 @@ void Gray2RGB_caller(const oclMat &src, oclMat &dst)
     openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options.c_str());
 }
 
-void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx)
+static void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx)
 {
-    int channels = src.oclchannels();
     std::string build_options = format("-D DEPTH_%d", src.depth());
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
@@ -117,7 +105,6 @@ void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx)
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
@@ -128,9 +115,9 @@ void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx)
     openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options.c_str());
 }
 
-void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
+static void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
 {
-    int channels = src.oclchannels();
+    int channels = dst.channels();
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
 
@@ -152,7 +139,7 @@ void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
     openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, buildOptions.c_str());
 }
 
-void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx)
+static void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx)
 {
     std::string build_options = format("-D DEPTH_%d", src.depth());
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
@@ -175,9 +162,8 @@ void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx)
     openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options.c_str());
 }
 
-void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx)
+static void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx)
 {
-    int channels = src.oclchannels();
     std::string build_options = format("-D DEPTH_%d", src.depth());
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
@@ -187,7 +173,6 @@ void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx)
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
@@ -198,10 +183,10 @@ void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx)
     openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options.c_str());
 }
 
-void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
+static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
 {
     Size sz = src.size();
-    int scn = src.oclchannels(), depth = src.depth(), bidx;
+    int scn = src.channels(), depth = src.depth(), bidx;
 
     CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F);
 
@@ -239,7 +224,7 @@ void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
     case CV_RGB2YUV:
     {
         CV_Assert(scn == 3 || scn == 4);
-        bidx = code == CV_RGB2YUV ? 0 : 2;
+        bidx = code == CV_BGR2YUV ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, 3));
         RGB2YUV_caller(src, dst, bidx);
         break;
@@ -247,9 +232,11 @@ void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
     case CV_YUV2BGR:
     case CV_YUV2RGB:
     {
-        CV_Assert(scn == 3 || scn == 4);
-        bidx = code == CV_YUV2RGB ? 0 : 2;
-        dst.create(sz, CV_MAKETYPE(depth, 3));
+        if( dcn <= 0 )
+            dcn = 3;
+        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
+        bidx = code == CV_YUV2BGR ? 0 : 2;
+        dst.create(sz, CV_MAKETYPE(depth, dcn));
         YUV2RGB_caller(src, dst, bidx);
         break;
     }
@@ -260,7 +247,7 @@ void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
     {
         CV_Assert(scn == 1);
         CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
-        dcn  = code == CV_YUV2BGRA_NV12 || code == CV_YUV2RGBA_NV12 ? 4 : 3;
+        dcn = code == CV_YUV2BGRA_NV12 || code == CV_YUV2RGBA_NV12 ? 4 : 3;
         bidx = code == CV_YUV2BGRA_NV12 || code == CV_YUV2BGR_NV12 ? 0 : 2;
 
         Size dstSz(sz.width, sz.height * 2 / 3);
@@ -280,6 +267,12 @@ void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
     case CV_YCrCb2BGR:
     case CV_YCrCb2RGB:
     {
+        if( dcn <= 0 )
+            dcn = 3;
+        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
+        bidx = code == CV_YCrCb2RGB ? 0 : 2;
+        dst.create(sz, CV_MAKETYPE(depth, dcn));
+//        YUV2RGB_caller(src, dst, bidx);
         break;
     }
     /*
@@ -297,7 +290,6 @@ void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
     }
 }
-}
 
 void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn)
 {
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 01286f7ad..2ba7739c2 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -46,22 +46,18 @@
 
 /**************************************PUBLICFUNC*************************************/
 
-#if defined (DOUBLE_SUPPORT)
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#endif
-
 #if defined (DEPTH_0)
 #define DATA_TYPE uchar
 #define MAX_NUM  255
 #define HALF_MAX 128
-#define SAT_CAST(num) convert_uchar_sat(num)
+#define SAT_CAST(num) convert_uchar_sat_rte(num)
 #endif
 
 #if defined (DEPTH_2)
 #define DATA_TYPE ushort
 #define MAX_NUM  65535
 #define HALF_MAX 32768
-#define SAT_CAST(num) convert_ushort_sat(num)
+#define SAT_CAST(num) convert_ushort_sat_rte(num)
 #endif
 
 #if defined (DEPTH_5)
@@ -71,11 +67,7 @@
 #define SAT_CAST(num) (num)
 #endif
 
-#ifndef DATA_TYPE
-    #define DATA_TYPE UNDEFINED
-#endif
-
-#define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n))
+#define CV_DESCALE(x, n) (((x) + (1 << ((n)-1))) >> (n))
 
 enum
 {
@@ -89,16 +81,16 @@ enum
 
 ///////////////////////////////////// RGB <-> GRAY //////////////////////////////////////
 
-__kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, int channels,
+__kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
                        int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                        int src_offset, int dst_offset)
 {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
 
     if (y < rows && x < cols)
     {
-        int src_idx = mad24(y, src_step, src_offset + x * channels);
+        int src_idx = mad24(y, src_step, src_offset + (x << 2));
         int dst_idx = mad24(y, dst_step, dst_offset + x);
 #if defined (DEPTH_5)
         dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f;
@@ -108,22 +100,24 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step, int chann
     }
 }
 
-__kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step,
+__kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int channels,
                        __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                        int src_offset, int dst_offset)
 {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
 
     if (y < rows && x < cols)
     {
         int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x * 4);
+        int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
+
         DATA_TYPE val = src[src_idx];
-        dst[dst_idx++] = val;
-        dst[dst_idx++] = val;
-        dst[dst_idx++] = val;
-        dst[dst_idx] = MAX_NUM;
+        dst[dst_idx] = val;
+        dst[dst_idx + 1] = val;
+        dst[dst_idx + 2] = val;
+        if (channels == 4)
+            dst[dst_idx + 3] = MAX_NUM;
     }
 }
 
@@ -132,7 +126,7 @@ __kernel void Gray2RGB(int cols,int rows,int src_step,int dst_step,
 __constant float c_RGB2YUVCoeffs_f[5]  = { 0.114f, 0.587f, 0.299f, 0.492f, 0.877f };
 __constant int   c_RGB2YUVCoeffs_i[5]  = { B2Y, G2Y, R2Y, 8061, 14369 };
 
-__kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels,
+__kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
                       int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                       int src_offset, int dst_offset)
 {
@@ -141,35 +135,34 @@ __kernel void RGB2YUV(int cols,int rows,int src_step,int dst_step,int channels,
 
     if (y < rows && x < cols)
     {
-        x *= channels;
+        x <<= 2;
         int src_idx = mad24(y, src_step, src_offset + x);
         int dst_idx = mad24(y, dst_step, dst_offset + x);
-        dst += dst_idx;
-        const DATA_TYPE rgb[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]};
+        DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
 
 #if defined (DEPTH_5)
         __constant float * coeffs = c_RGB2YUVCoeffs_f;
-        const DATA_TYPE Y  = rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2];
-        const DATA_TYPE Cr = (rgb[bidx] - Y) * coeffs[3] + HALF_MAX;
-        const DATA_TYPE Cb = (rgb[bidx^2] - Y) * coeffs[4] + HALF_MAX;
+        DATA_TYPE Y  = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
+        DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
+        DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
 #else
         __constant int * coeffs = c_RGB2YUVCoeffs_i;
-        const int delta = HALF_MAX * (1 << yuv_shift);
-        const int Y =  CV_DESCALE(rgb[0] * coeffs[bidx] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx^2], yuv_shift);
-        const int Cr = CV_DESCALE((rgb[bidx] - Y) * coeffs[3] + delta, yuv_shift);
-        const int Cb = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[4] + delta, yuv_shift);
+        int delta = HALF_MAX * (1 << yuv_shift);
+        int Y =  CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
+        int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
+        int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
 #endif
 
-        dst[0] = SAT_CAST( Y );
-        dst[1] = SAT_CAST( Cr );
-        dst[2] = SAT_CAST( Cb );
+        dst[dst_idx] = SAT_CAST( Y );
+        dst[dst_idx + 1] = SAT_CAST( Cr );
+        dst[dst_idx + 2] = SAT_CAST( Cb );
     }
 }
 
 __constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
 __constant int   c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
 
-__kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels,
+__kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, int channels,
                       int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                       int src_offset, int dst_offset)
 {
@@ -178,27 +171,28 @@ __kernel void YUV2RGB(int cols,int rows,int src_step,int dst_step,int channels,
 
     if (y < rows && x < cols)
     {
-        x *= channels;
+        x <<= 2;
         int src_idx = mad24(y, src_step, src_offset + x);
         int dst_idx = mad24(y, dst_step, dst_offset + x);
-        dst += dst_idx;
-        const DATA_TYPE yuv[] = {src[src_idx], src[src_idx + 1], src[src_idx + 2]};
+        DATA_TYPE yuv[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
 
 #if defined (DEPTH_5)
         __constant float * coeffs = c_YUV2RGBCoeffs_f;
-        const float b = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3];
-        const float g = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1];
-        const float r = yuv[0] + (yuv[1] - HALF_MAX) * coeffs[0];
+        float b = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3];
+        float g = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1];
+        float r = yuv[0] + (yuv[1] - HALF_MAX) * coeffs[0];
 #else
         __constant int * coeffs = c_YUV2RGBCoeffs_i;
-        const int b = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[3], yuv_shift);
-        const int g = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift);
-        const int r = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift);
+        int b = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[3], yuv_shift);
+        int g = yuv[0] + CV_DESCALE((yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1], yuv_shift);
+        int r = yuv[0] + CV_DESCALE((yuv[1] - HALF_MAX) * coeffs[0], yuv_shift);
 #endif
 
-        dst[bidx^2] = SAT_CAST( b );
-        dst[1]      = SAT_CAST( g );
-        dst[bidx]   = SAT_CAST( r );
+        dst[dst_idx + bidx] = SAT_CAST( b );
+        dst[dst_idx + 1]      = SAT_CAST( g );
+        dst[dst_idx + (bidx^2)]   = SAT_CAST( r );
+        if (channels == 4)
+            dst[dst_idx + 3] = MAX_NUM;
     }
 }
 
@@ -261,12 +255,12 @@ __kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step,
     }
 }
 
-///////////////////////////////////// RGB <-> YUV //////////////////////////////////////
+///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
 
 __constant float c_RGB2YCrCbCoeffs_f[5] = {0.299f, 0.587f, 0.114f, 0.713f, 0.564f};
 __constant int   c_RGB2YCrCbCoeffs_i[5] = {R2Y, G2Y, B2Y, 11682, 9241};
 
-__kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels,
+__kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
                         int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                         int src_offset, int dst_offset)
 {
@@ -275,28 +269,27 @@ __kernel void RGB2YCrCb(int cols,int rows,int src_step,int dst_step,int channels
 
     if (y < rows && x < cols)
     {
-        x *= channels;
+        x <<= 2;
         int src_idx = mad24(y, src_step, src_offset + x);
         int dst_idx = mad24(y, dst_step, dst_offset + x);
 
-        dst += dst_idx;
-        const DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
+        DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
 
 #if defined (DEPTH_5)
         __constant float * coeffs = c_RGB2YCrCbCoeffs_f;
-        const DATA_TYPE Y  = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
-        const DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
-        const DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
+        DATA_TYPE Y  = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
+        DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
+        DATA_TYPE Cb = (rgb[bidx] - Y) * coeffs[4] + HALF_MAX;
 #else
         __constant int * coeffs = c_RGB2YCrCbCoeffs_i;
-        const int delta = HALF_MAX * (1 << yuv_shift);
-        const int Y =  CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
-        const int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
-        const int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
+        int delta = HALF_MAX * (1 << yuv_shift);
+        int Y =  CV_DESCALE(rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx], yuv_shift);
+        int Cr = CV_DESCALE((rgb[bidx^2] - Y) * coeffs[3] + delta, yuv_shift);
+        int Cb = CV_DESCALE((rgb[bidx] - Y) * coeffs[4] + delta, yuv_shift);
 #endif
 
-        dst[0] = SAT_CAST( Y );
-        dst[1] = SAT_CAST( Cr );
-        dst[2] = SAT_CAST( Cb );
+        dst[dst_idx] = SAT_CAST( Y );
+        dst[dst_idx + 1] = SAT_CAST( Cr );
+        dst[dst_idx + 2] = SAT_CAST( Cb );
     }
 }
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 4b6a6e41b..f5f9f4317 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -46,8 +46,6 @@
 #include "test_precomp.hpp"
 #ifdef HAVE_OPENCL
 
-namespace
-{
 using namespace testing;
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -108,8 +106,8 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
         {
             random_roi(channelsIn, channelsOut);
 
-            cv::cvtColor(src1_roi, dst1_roi, code);
-            cv::ocl::cvtColor(gsrc1_roi, gdst1_roi, code);
+            cv::cvtColor(src1_roi, dst1_roi, code, channelsOut);
+            cv::ocl::cvtColor(gsrc1_roi, gdst1_roi, code, channelsOut);
 
             Near();
         }
@@ -125,7 +123,7 @@ OCL_TEST_P(CvtColor, RGB2GRAY)
 OCL_TEST_P(CvtColor, GRAY2RGB)
 {
     doTest(1, 3, CVTCODE(GRAY2RGB));
-};
+}
 
 OCL_TEST_P(CvtColor, BGR2GRAY)
 {
@@ -134,25 +132,26 @@ OCL_TEST_P(CvtColor, BGR2GRAY)
 OCL_TEST_P(CvtColor, GRAY2BGR)
 {
     doTest(1, 3, CVTCODE(GRAY2BGR));
-};
+}
 
 OCL_TEST_P(CvtColor, RGBA2GRAY)
 {
-    doTest(3, 1, CVTCODE(RGBA2GRAY));
+    doTest(4, 1, CVTCODE(RGBA2GRAY));
 }
 OCL_TEST_P(CvtColor, GRAY2RGBA)
 {
-    doTest(1, 3, CVTCODE(GRAY2RGBA));
-};
+    doTest(1, 4, CVTCODE(GRAY2RGBA));
+}
 
 OCL_TEST_P(CvtColor, BGRA2GRAY)
 {
-    doTest(3, 1, CVTCODE(BGRA2GRAY));
+    doTest(4, 1, CVTCODE(BGRA2GRAY));
 }
 OCL_TEST_P(CvtColor, GRAY2BGRA)
 {
-    doTest(1, 3, CVTCODE(GRAY2BGRA));
-};
+    doTest(1, 4, CVTCODE(GRAY2BGRA));
+}
+
 
 OCL_TEST_P(CvtColor, RGB2YUV)
 {
@@ -162,6 +161,14 @@ OCL_TEST_P(CvtColor, BGR2YUV)
 {
     doTest(3, 3, CVTCODE(BGR2YUV));
 }
+OCL_TEST_P(CvtColor, RGBA2YUV)
+{
+    doTest(4, 3, CVTCODE(RGB2YUV));
+}
+OCL_TEST_P(CvtColor, BGRA2YUV)
+{
+    doTest(4, 3, CVTCODE(BGR2YUV));
+}
 OCL_TEST_P(CvtColor, YUV2RGB)
 {
     doTest(3, 3, CVTCODE(YUV2RGB));
@@ -170,6 +177,16 @@ OCL_TEST_P(CvtColor, YUV2BGR)
 {
     doTest(3, 3, CVTCODE(YUV2BGR));
 }
+OCL_TEST_P(CvtColor, YUV2RGBA)
+{
+    doTest(3, 4, CVTCODE(YUV2RGB));
+}
+OCL_TEST_P(CvtColor, YUV2BGRA)
+{
+    doTest(3, 4, CVTCODE(YUV2BGR));
+}
+
+
 OCL_TEST_P(CvtColor, RGB2YCrCb)
 {
     doTest(3, 3, CVTCODE(RGB2YCrCb));
@@ -178,8 +195,33 @@ OCL_TEST_P(CvtColor, BGR2YCrCb)
 {
     doTest(3, 3, CVTCODE(BGR2YCrCb));
 }
+OCL_TEST_P(CvtColor, RGBA2YCrCb)
+{
+    doTest(4, 3, CVTCODE(RGB2YCrCb));
+}
+OCL_TEST_P(CvtColor, BGRA2YCrCb)
+{
+    doTest(4, 3, CVTCODE(BGR2YCrCb));
+}
+//OCL_TEST_P(CvtColor, YCrCb2RGB)
+//{
+//    doTest(3, 3, CVTCODE(YCrCb2RGB));
+//}
+//OCL_TEST_P(CvtColor, YCrCb2BGR)
+//{
+//    doTest(3, 3, CVTCODE(YCrCb2BGR));
+//}
+//OCL_TEST_P(CvtColor, YCrCb2RGBA)
+//{
+//    doTest(3, 4, CVTCODE(YCrCb2RGB));
+//}
+//OCL_TEST_P(CvtColor, YCrCb2BGRA)
+//{
+//    doTest(3, 4, CVTCODE(YCrCb2BGR));
+//}
 
-struct CvtColor_YUV420 : CvtColor
+struct CvtColor_YUV420 :
+        public CvtColor
 {
     void random_roi(int channelsIn, int channelsOut)
     {
@@ -203,37 +245,32 @@ struct CvtColor_YUV420 : CvtColor
 OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12)
 {
     doTest(1, 4, CV_YUV2RGBA_NV12);
-};
+}
 
 OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12)
 {
     doTest(1, 4, CV_YUV2BGRA_NV12);
-};
+}
 
 OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12)
 {
     doTest(1, 3, CV_YUV2RGB_NV12);
-};
+}
 
 OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12)
 {
     doTest(1, 3, CV_YUV2BGR_NV12);
-};
+}
 
 
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor,
                             testing::Combine(
                                 testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),
-                                Bool()
-                            )
-                        );
+                                Bool()));
 
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor_YUV420,
                             testing::Combine(
                                 testing::Values(MatDepth(CV_8U)),
-                                Bool()
-                            )
-                        );
+                                Bool()));
 
-}
 #endif

From a57030a0cda4a144c1b800ce5e7efb6c127aa571 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Sat, 9 Nov 2013 17:03:30 +0400
Subject: [PATCH 05/45] added YCrCb to RGB, BGR, RGBA, BGRA modes to
 ocl::cvtColor

---
 modules/ocl/src/color.cpp           | 28 +++++++++++++++++++--
 modules/ocl/src/opencl/cvt_color.cl | 38 +++++++++++++++++++++++++++++
 modules/ocl/test/test_color.cpp     | 32 ++++++++++++------------
 3 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index 766e992dd..b25b71fee 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -162,6 +162,30 @@ static void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx)
     openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options.c_str());
 }
 
+static void YCrCb2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
+{
+    int channels = dst.channels();
+    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
+    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+
+    std::string buildOptions = format("-D DEPTH_%d", src.depth());
+
+    vector<pair<size_t , const void *> > args;
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+
+    size_t gt[3] = { src.cols, src.rows, 1 }, lt[3] = { 16, 16, 1 };
+    openCLExecuteKernel(src.clCxt, &cvt_color, "YCrCb2RGB", gt, lt, args, -1, -1, buildOptions.c_str());
+}
+
 static void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx)
 {
     std::string build_options = format("-D DEPTH_%d", src.depth());
@@ -270,9 +294,9 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         if( dcn <= 0 )
             dcn = 3;
         CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
-        bidx = code == CV_YCrCb2RGB ? 0 : 2;
+        bidx = code == CV_YCrCb2BGR ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, dcn));
-//        YUV2RGB_caller(src, dst, bidx);
+        YCrCb2RGB_caller(src, dst, bidx);
         break;
     }
     /*
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 2ba7739c2..de53da52e 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -293,3 +293,41 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
         dst[dst_idx + 2] = SAT_CAST( Cb );
     }
 }
+
+__constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f };
+__constant int   c_YCrCb2RGBCoeffs_i[4] = { 22987, -11698, -5636, 29049 };
+
+__kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, int channels,
+                        int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
+                        int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        DATA_TYPE ycrcb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
+
+#ifdef DEPTH_5
+        __constant float * coeff = c_YCrCb2RGBCoeffs_f;
+        float r = ycrcb[0] + coeff[0] * (ycrcb[1] - HALF_MAX);
+        float g = ycrcb[0] + coeff[1] * (ycrcb[1] - HALF_MAX) + coeff[2] * (ycrcb[2] - HALF_MAX);
+        float b = ycrcb[0] + coeff[3] * (ycrcb[2] - HALF_MAX);
+#else
+        __constant int * coeff = c_YCrCb2RGBCoeffs_i;
+        int r = ycrcb[0] + CV_DESCALE(coeff[0] * (ycrcb[1] - HALF_MAX), yuv_shift);
+        int g = ycrcb[0] + CV_DESCALE(coeff[1] * (ycrcb[1] - HALF_MAX) + coeff[2] * (ycrcb[2] - HALF_MAX), yuv_shift);
+        int b = ycrcb[0] + CV_DESCALE(coeff[3] * (ycrcb[2] - HALF_MAX), yuv_shift);
+#endif
+
+        dst[dst_idx + (bidx^2)] = SAT_CAST(r);
+        dst[dst_idx + 1] = SAT_CAST(g);
+        dst[dst_idx + bidx] = SAT_CAST(b);
+        if (channels == 4)
+            dst[dst_idx + 3] = MAX_NUM;
+    }
+}
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index f5f9f4317..b2f5c6fb2 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -203,22 +203,22 @@ OCL_TEST_P(CvtColor, BGRA2YCrCb)
 {
     doTest(4, 3, CVTCODE(BGR2YCrCb));
 }
-//OCL_TEST_P(CvtColor, YCrCb2RGB)
-//{
-//    doTest(3, 3, CVTCODE(YCrCb2RGB));
-//}
-//OCL_TEST_P(CvtColor, YCrCb2BGR)
-//{
-//    doTest(3, 3, CVTCODE(YCrCb2BGR));
-//}
-//OCL_TEST_P(CvtColor, YCrCb2RGBA)
-//{
-//    doTest(3, 4, CVTCODE(YCrCb2RGB));
-//}
-//OCL_TEST_P(CvtColor, YCrCb2BGRA)
-//{
-//    doTest(3, 4, CVTCODE(YCrCb2BGR));
-//}
+OCL_TEST_P(CvtColor, YCrCb2RGB)
+{
+    doTest(3, 3, CVTCODE(YCrCb2RGB));
+}
+OCL_TEST_P(CvtColor, YCrCb2BGR)
+{
+    doTest(3, 3, CVTCODE(YCrCb2BGR));
+}
+OCL_TEST_P(CvtColor, YCrCb2RGBA)
+{
+    doTest(3, 4, CVTCODE(YCrCb2RGB));
+}
+OCL_TEST_P(CvtColor, YCrCb2BGRA)
+{
+    doTest(3, 4, CVTCODE(YCrCb2BGR));
+}
 
 struct CvtColor_YUV420 :
         public CvtColor

From 33ae64201c024ed3af1cc9eb38e64e58842c3d87 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Sat, 9 Nov 2013 19:14:38 +0400
Subject: [PATCH 06/45] color.cpp refactoring: created generic interface for
 toRGB and fromRGB callers

---
 modules/ocl/src/color.cpp           | 150 ++++------------------------
 modules/ocl/src/opencl/cvt_color.cl |  14 +--
 modules/ocl/test/test_color.cpp     |   7 +-
 3 files changed, 32 insertions(+), 139 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index b25b71fee..aed9a7f0c 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -50,7 +50,7 @@
 using namespace cv;
 using namespace cv::ocl;
 
-static void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
+static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName)
 {
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
@@ -58,121 +58,30 @@ static void RGB2Gray_caller(const oclMat &src, oclMat &dst, int bidx)
     std::string build_options = format("-D DEPTH_%d", src.depth());
 
     vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = { src.cols, src.rows, 1 }, lt[3] = { 16, 16, 1 };
-    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2Gray", gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void Gray2RGB_caller(const oclMat &src, oclMat &dst)
-{
-    int channels = dst.channels();
-    std::string build_options = format("-D DEPTH_%d", src.depth());
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
-    openCLExecuteKernel(src.clCxt, &cvt_color, "Gray2RGB", gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void RGB2YUV_caller(const oclMat &src, oclMat &dst, int bidx)
-{
-    std::string build_options = format("-D DEPTH_%d", src.depth());
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
-    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YUV", gt, lt, args, -1, -1, build_options.c_str());
-}
-
-static void YUV2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
-{
-    int channels = dst.channels();
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    std::string buildOptions = format("-D DEPTH_%d", src.depth());
-
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
-    openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGB", gt, lt, args, -1, -1, buildOptions.c_str());
-}
-
-static void YUV2RGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx)
-{
-    std::string build_options = format("-D DEPTH_%d", src.depth());
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
 
-    size_t gt[3] = {dst.cols / 2, dst.rows / 2, 1}, lt[3] = {16, 16, 1};
-    openCLExecuteKernel(src.clCxt, &cvt_color, "YUV2RGBA_NV12", gt, lt, args, -1, -1, build_options.c_str());
+    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
 
-static void YCrCb2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
+static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName)
 {
     int channels = dst.channels();
+    std::string build_options = format("-D DEPTH_%d", src.depth());
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
 
-    std::string buildOptions = format("-D DEPTH_%d", src.depth());
-
     vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
@@ -182,29 +91,8 @@ static void YCrCb2RGB_caller(const oclMat &src, oclMat &dst, int bidx)
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
 
-    size_t gt[3] = { src.cols, src.rows, 1 }, lt[3] = { 16, 16, 1 };
-    openCLExecuteKernel(src.clCxt, &cvt_color, "YCrCb2RGB", gt, lt, args, -1, -1, buildOptions.c_str());
-}
-
-static void RGB2YCrCb_caller(const oclMat &src, oclMat &dst, int bidx)
-{
-    std::string build_options = format("-D DEPTH_%d", src.depth());
-    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
-    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
-
-    vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src.rows));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
-    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
-
-    size_t gt[3] = {src.cols, src.rows, 1}, lt[3] = {16, 16, 1};
-    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2YCrCb", gt, lt, args, -1, -1, build_options.c_str());
+    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
 
 static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
@@ -232,7 +120,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, 1));
-        RGB2Gray_caller(src, dst, bidx);
+        fromRGB_caller(src, dst, bidx, "RGB2Gray");
         break;
     }
     case CV_GRAY2BGR:
@@ -241,7 +129,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Assert(scn == 1);
         dcn  = code == CV_GRAY2BGRA ? 4 : 3;
         dst.create(sz, CV_MAKETYPE(depth, dcn));
-        Gray2RGB_caller(src, dst);
+        toRGB_caller(src, dst, 0, "Gray2RGB");
         break;
     }
     case CV_BGR2YUV:
@@ -250,7 +138,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2YUV ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, 3));
-        RGB2YUV_caller(src, dst, bidx);
+        fromRGB_caller(src, dst, bidx, "RGB2YUV");
         break;
     }
     case CV_YUV2BGR:
@@ -261,7 +149,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
         bidx = code == CV_YUV2BGR ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, dcn));
-        YUV2RGB_caller(src, dst, bidx);
+        toRGB_caller(src, dst, bidx, "YUV2RGB");
         break;
     }
     case CV_YUV2RGB_NV12:
@@ -276,7 +164,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
 
         Size dstSz(sz.width, sz.height * 2 / 3);
         dst.create(dstSz, CV_MAKETYPE(depth, dcn));
-        YUV2RGB_NV12_caller(src, dst, bidx);
+        toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
         break;
     }
     case CV_BGR2YCrCb:
@@ -285,7 +173,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2YCrCb ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, 3));
-        RGB2YCrCb_caller(src, dst, bidx);
+        fromRGB_caller(src, dst, bidx, "RGB2YCrCb");
         break;
     }
     case CV_YCrCb2BGR:
@@ -296,7 +184,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
         bidx = code == CV_YCrCb2BGR ? 0 : 2;
         dst.create(sz, CV_MAKETYPE(depth, dcn));
-        YCrCb2RGB_caller(src, dst, bidx);
+        toRGB_caller(src, dst, bidx, "YCrCb2RGB");
         break;
     }
     /*
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index de53da52e..d9426b28b 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -100,7 +100,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
     }
 }
 
-__kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int channels,
+__kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int channels, int bidx,
                        __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                        int src_offset, int dst_offset)
 {
@@ -203,17 +203,17 @@ __constant int ITUR_BT_601_CVG = 852492;
 __constant int ITUR_BT_601_CVR = 1673527;
 __constant int ITUR_BT_601_SHIFT = 20;
 
-__kernel void YUV2RGBA_NV12(int cols,int rows,int src_step,int dst_step,
-                            int bidx, int width, int height, __global const uchar* src, __global uchar* dst,
+__kernel void YUV2RGBA_NV12(int cols, int rows, int src_step, int dst_step, int channels,
+                            int bidx, __global const uchar* src, __global uchar* dst,
                             int src_offset, int dst_offset)
 {
-    const int x = get_global_id(0); // max_x = width / 2
-    const int y = get_global_id(1); // max_y = height/ 2
+    const int x = get_global_id(0);
+    const int y = get_global_id(1);
 
-    if (y < height / 2 && x < width / 2 )
+    if (y < rows / 2 && x < cols / 2 )
     {
         __global const uchar* ysrc = src + mad24(y << 1, src_step, (x << 1) + src_offset);
-        __global const uchar* usrc = src + mad24(height + y, src_step, (x << 1) + src_offset);
+        __global const uchar* usrc = src + mad24(rows + y, src_step, (x << 1) + src_offset);
         __global uchar*       dst1 = dst + mad24(y << 1, dst_step, (x << 3) + dst_offset);
         __global uchar*       dst2 = dst + mad24((y << 1) + 1, dst_step, (x << 3) + dst_offset);
 
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index b2f5c6fb2..df52b94b2 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -116,6 +116,8 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
 
 #define CVTCODE(name) cv::COLOR_ ## name
 
+// RGB <-> Gray
+
 OCL_TEST_P(CvtColor, RGB2GRAY)
 {
     doTest(3, 1, CVTCODE(RGB2GRAY));
@@ -152,6 +154,7 @@ OCL_TEST_P(CvtColor, GRAY2BGRA)
     doTest(1, 4, CVTCODE(GRAY2BGRA));
 }
 
+// RGB <-> YUV
 
 OCL_TEST_P(CvtColor, RGB2YUV)
 {
@@ -186,6 +189,7 @@ OCL_TEST_P(CvtColor, YUV2BGRA)
     doTest(3, 4, CVTCODE(YUV2BGR));
 }
 
+// RGB <-> YCrCb
 
 OCL_TEST_P(CvtColor, RGB2YCrCb)
 {
@@ -220,6 +224,8 @@ OCL_TEST_P(CvtColor, YCrCb2BGRA)
     doTest(3, 4, CVTCODE(YCrCb2BGR));
 }
 
+// YUV -> RGBA_NV12
+
 struct CvtColor_YUV420 :
         public CvtColor
 {
@@ -262,7 +268,6 @@ OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12)
     doTest(1, 3, CV_YUV2BGR_NV12);
 }
 
-
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor,
                             testing::Combine(
                                 testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),

From 5e02b20482fff0954344f80d3603733c75a7ff72 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Sun, 10 Nov 2013 13:38:09 +0400
Subject: [PATCH 07/45] added RGB -> XYZ conversion to ocl::cvtColor

---
 modules/ocl/src/color.cpp           | 71 ++++++++++++++++++++++++++---
 modules/ocl/src/opencl/cvt_color.cl | 37 +++++++++++++++
 modules/ocl/test/test_color.cpp     | 70 ++++++++++++++++++----------
 3 files changed, 149 insertions(+), 29 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index aed9a7f0c..02e7bf3c8 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -50,7 +50,8 @@
 using namespace cv;
 using namespace cv::ocl;
 
-static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName)
+static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
+                           const oclMat & data = oclMat())
 {
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
@@ -68,6 +69,9 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
 
+    if (!data.empty())
+        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
+
     size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
@@ -112,10 +116,10 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
         case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
         */
-    case CV_BGR2GRAY:
-    case CV_BGRA2GRAY:
     case CV_RGB2GRAY:
+    case CV_BGR2GRAY:
     case CV_RGBA2GRAY:
+    case CV_BGRA2GRAY:
     {
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
@@ -190,9 +194,64 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
     /*
     case CV_BGR5652GRAY: case CV_BGR5552GRAY:
     case CV_GRAY2BGR565: case CV_GRAY2BGR555:
-    case CV_BGR2YCrCb: case CV_RGB2YCrCb:
-    case CV_BGR2XYZ: case CV_RGB2XYZ:
-    case CV_XYZ2BGR: case CV_XYZ2RGB:
+    */
+    case CV_BGR2XYZ:
+    case CV_RGB2XYZ:
+    {
+        CV_Assert(scn == 3 || scn == 4);
+        bidx = code == CV_BGR2XYZ ? 0 : 2;
+        dst.create(sz, CV_MAKE_TYPE(depth, 3));
+
+        void * pdata = NULL;
+        if (depth == CV_32F)
+        {
+            float coeffs[] =
+            {
+                0.412453f, 0.357580f, 0.180423f,
+                0.212671f, 0.715160f, 0.072169f,
+                0.019334f, 0.119193f, 0.950227f
+            };
+            if (bidx == 0)
+            {
+                std::swap(coeffs[0], coeffs[2]);
+                std::swap(coeffs[3], coeffs[5]);
+                std::swap(coeffs[6], coeffs[8]);
+            }
+            pdata = coeffs;
+        }
+        else
+        {
+            int coeffs[] =
+            {
+                1689,    1465,    739,
+                871,     2929,    296,
+                79,      488,     3892
+            };
+            if (bidx == 0)
+            {
+                std::swap(coeffs[0], coeffs[2]);
+                std::swap(coeffs[3], coeffs[5]);
+                std::swap(coeffs[6], coeffs[8]);
+            }
+            pdata = coeffs;
+        }
+        oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32F : CV_32S, pdata);
+
+        fromRGB_caller(src, dst, bidx, "RGB2XYZ", oclCoeffs);
+        break;
+    }
+    case CV_XYZ2BGR:
+    case CV_XYZ2RGB:
+    {
+        if (dcn <= 0)
+            dcn = 3;
+        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
+        bidx = code == CV_XYZ2BGR ? 0 : 2;
+        dst.create(sz, CV_MAKE_TYPE(depth, dcn));
+        toRGB_caller(src, dst, bidx, "XYZ2RGB");
+        break;
+    }
+    /*
     case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
     case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
     case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index d9426b28b..775628bab 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -48,6 +48,7 @@
 
 #if defined (DEPTH_0)
 #define DATA_TYPE uchar
+#define COEFF_TYPE int
 #define MAX_NUM  255
 #define HALF_MAX 128
 #define SAT_CAST(num) convert_uchar_sat_rte(num)
@@ -55,6 +56,7 @@
 
 #if defined (DEPTH_2)
 #define DATA_TYPE ushort
+#define COEFF_TYPE int
 #define MAX_NUM  65535
 #define HALF_MAX 32768
 #define SAT_CAST(num) convert_ushort_sat_rte(num)
@@ -62,6 +64,7 @@
 
 #if defined (DEPTH_5)
 #define DATA_TYPE float
+#define COEFF_TYPE float
 #define MAX_NUM  1.0f
 #define HALF_MAX 0.5f
 #define SAT_CAST(num) (num)
@@ -331,3 +334,37 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, int chan
             dst[dst_idx + 3] = MAX_NUM;
     }
 }
+
+///////////////////////////////////// RGB <-> XYZ //////////////////////////////////////
+
+#pragma OPENCL EXTENSION cl_amd_printf:enable
+
+__kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
+                        int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
+                        int src_offset, int dst_offset, __global COEFF_TYPE * coeffs)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        DATA_TYPE r = src[src_idx], g = src[src_idx + 1], b = src[src_idx + 2];
+
+#ifdef DEPTH_5
+        DATA_TYPE x = r * coeffs[0] + g * coeffs[1] + b * coeffs[2];
+        DATA_TYPE y = r * coeffs[3] + g * coeffs[4] + b * coeffs[5];
+        DATA_TYPE z = r * coeffs[6] + g * coeffs[7] + b * coeffs[8];
+#else
+        DATA_TYPE x = CV_DESCALE(r * coeffs[0] + g * coeffs[1] + b * coeffs[2], xyz_shift);
+        DATA_TYPE y = CV_DESCALE(r * coeffs[3] + g * coeffs[4] + b * coeffs[5], xyz_shift);
+        DATA_TYPE z = CV_DESCALE(r * coeffs[6] + g * coeffs[7] + b * coeffs[8], xyz_shift);
+#endif
+        dst[dst_idx] = SAT_CAST(x);
+        dst[dst_idx + 1] = SAT_CAST(y);
+        dst[dst_idx + 2] = SAT_CAST(z);
+    }
+}
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index df52b94b2..0d9eb8f13 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -47,6 +47,7 @@
 #ifdef HAVE_OPENCL
 
 using namespace testing;
+using namespace cv;
 
 ///////////////////////////////////////////////////////////////////////////////////////////////////////
 // cvtColor
@@ -57,20 +58,20 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
     bool use_roi;
 
     // src mat
-    cv::Mat src1;
-    cv::Mat dst1;
+    Mat src;
+    Mat dst;
 
     // src mat with roi
-    cv::Mat src1_roi;
-    cv::Mat dst1_roi;
+    Mat src_roi;
+    Mat dst_roi;
 
     // ocl dst mat for testing
-    cv::ocl::oclMat gsrc1_whole;
-    cv::ocl::oclMat gdst1_whole;
+    ocl::oclMat gsrc_whole;
+    ocl::oclMat gdst_whole;
 
     // ocl mat with roi
-    cv::ocl::oclMat gsrc1_roi;
-    cv::ocl::oclMat gdst1_roi;
+    ocl::oclMat gsrc_roi;
+    ocl::oclMat gdst_roi;
 
     virtual void SetUp()
     {
@@ -85,19 +86,23 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
 
         Size roiSize = randomSize(1, MAX_VALUE);
         Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src1, src1_roi, roiSize, srcBorder, srcType, 2, 100);
+        randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100);
 
-        Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst1, dst1_roi, roiSize, dst1Border, dstType, 5, 16);
+        Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
+        randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16);
 
-        generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder);
-        generateOclMat(gdst1_whole, gdst1_roi, dst1, roiSize, dst1Border);
+        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
+        generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
     }
 
     void Near(double threshold = 1e-3)
     {
-        EXPECT_MAT_NEAR(dst1, gdst1_whole, threshold);
-        EXPECT_MAT_NEAR(dst1_roi, gdst1_roi, threshold);
+        Mat whole, roi;
+        gdst_whole.download(whole);
+        gdst_roi.download(roi);
+
+        EXPECT_MAT_NEAR(dst, whole, threshold);
+        EXPECT_MAT_NEAR(dst_roi, roi, threshold);
     }
 
     void doTest(int channelsIn, int channelsOut, int code)
@@ -106,15 +111,15 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
         {
             random_roi(channelsIn, channelsOut);
 
-            cv::cvtColor(src1_roi, dst1_roi, code, channelsOut);
-            cv::ocl::cvtColor(gsrc1_roi, gdst1_roi, code, channelsOut);
+            cvtColor(src_roi, dst_roi, code, channelsOut);
+            ocl::cvtColor(gsrc_roi, gdst_roi, code, channelsOut);
 
             Near();
         }
     }
 };
 
-#define CVTCODE(name) cv::COLOR_ ## name
+#define CVTCODE(name) COLOR_ ## name
 
 // RGB <-> Gray
 
@@ -224,6 +229,25 @@ OCL_TEST_P(CvtColor, YCrCb2BGRA)
     doTest(3, 4, CVTCODE(YCrCb2BGR));
 }
 
+// RGB <-> XYZ
+
+OCL_TEST_P(CvtColor, RGB2XYZ)
+{
+    doTest(3, 3, CVTCODE(RGB2XYZ));
+}
+OCL_TEST_P(CvtColor, BGR2XYZ)
+{
+    doTest(3, 3, CVTCODE(BGR2XYZ));
+}
+OCL_TEST_P(CvtColor, RGBA2XYZ)
+{
+    doTest(4, 3, CVTCODE(RGB2XYZ));
+}
+OCL_TEST_P(CvtColor, BGRA2XYZ)
+{
+    doTest(4, 3, CVTCODE(BGR2XYZ));
+}
+
 // YUV -> RGBA_NV12
 
 struct CvtColor_YUV420 :
@@ -238,13 +262,13 @@ struct CvtColor_YUV420 :
         roiSize.width *= 2;
         roiSize.height *= 3;
         Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(src1, src1_roi, roiSize, srcBorder, srcType, 2, 100);
+        randomSubMat(src, src_roi, roiSize, srcBorder, srcType, 2, 100);
 
-        Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
-        randomSubMat(dst1, dst1_roi, roiSize, dst1Border, dstType, 5, 16);
+        Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
+        randomSubMat(dst, dst_roi, roiSize, dstBorder, dstType, 5, 16);
 
-        generateOclMat(gsrc1_whole, gsrc1_roi, src1, roiSize, srcBorder);
-        generateOclMat(gdst1_whole, gdst1_roi, dst1, roiSize, dst1Border);
+        generateOclMat(gsrc_whole, gsrc_roi, src, roiSize, srcBorder);
+        generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
     }
 };
 

From 581a3e444d4f9be398a0517dbe57ad913556a1d3 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Sun, 10 Nov 2013 14:30:37 +0400
Subject: [PATCH 08/45] added XYZ to RGB conversion to ocl::cvtColor

---
 modules/ocl/src/color.cpp           | 50 +++++++++++++--
 modules/ocl/src/opencl/cvt_color.cl | 94 ++++++++++++++++++++---------
 modules/ocl/test/test_color.cpp     | 17 ++++++
 3 files changed, 125 insertions(+), 36 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index 02e7bf3c8..9688be08d 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -76,10 +76,10 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
 
-static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName)
+static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
+                         const oclMat & data = oclMat())
 {
-    int channels = dst.channels();
-    std::string build_options = format("-D DEPTH_%d", src.depth());
+    std::string build_options = format("-D DEPTH_%d -D channels=%d", src.depth(), dst.channels());
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
 
@@ -88,13 +88,15 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
-    args.push_back( make_pair( sizeof(cl_int) , (void *)&channels));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
     args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
 
+    if (!data.empty())
+        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
+
     size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
@@ -235,7 +237,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
             }
             pdata = coeffs;
         }
-        oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32F : CV_32S, pdata);
+        oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32FC1 : CV_32SC1, pdata);
 
         fromRGB_caller(src, dst, bidx, "RGB2XYZ", oclCoeffs);
         break;
@@ -248,7 +250,43 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
         bidx = code == CV_XYZ2BGR ? 0 : 2;
         dst.create(sz, CV_MAKE_TYPE(depth, dcn));
-        toRGB_caller(src, dst, bidx, "XYZ2RGB");
+
+        void * pdata = NULL;
+        if (depth == CV_32F)
+        {
+            float coeffs[] =
+            {
+                3.240479f, -1.53715f, -0.498535f,
+                -0.969256f, 1.875991f, 0.041556f,
+                0.055648f, -0.204043f, 1.057311f
+            };
+            if (bidx == 0)
+            {
+                std::swap(coeffs[0], coeffs[6]);
+                std::swap(coeffs[1], coeffs[7]);
+                std::swap(coeffs[2], coeffs[8]);
+            }
+            pdata = coeffs;
+        }
+        else
+        {
+            int coeffs[] =
+            {
+                13273,  -6296,  -2042,
+                -3970,   7684,    170,
+                  228,   -836,   4331
+            };
+            if (bidx == 0)
+            {
+                std::swap(coeffs[0], coeffs[6]);
+                std::swap(coeffs[1], coeffs[7]);
+                std::swap(coeffs[2], coeffs[8]);
+            }
+            pdata = coeffs;
+        }
+        oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32FC1 : CV_32SC1, pdata);
+
+        toRGB_caller(src, dst, bidx, "XYZ2RGB", oclCoeffs);
         break;
     }
     /*
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 775628bab..0f44897a1 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -46,7 +46,7 @@
 
 /**************************************PUBLICFUNC*************************************/
 
-#if defined (DEPTH_0)
+#ifdef DEPTH_0
 #define DATA_TYPE uchar
 #define COEFF_TYPE int
 #define MAX_NUM  255
@@ -54,7 +54,7 @@
 #define SAT_CAST(num) convert_uchar_sat_rte(num)
 #endif
 
-#if defined (DEPTH_2)
+#ifdef DEPTH_2
 #define DATA_TYPE ushort
 #define COEFF_TYPE int
 #define MAX_NUM  65535
@@ -62,7 +62,7 @@
 #define SAT_CAST(num) convert_ushort_sat_rte(num)
 #endif
 
-#if defined (DEPTH_5)
+#ifdef DEPTH_5
 #define DATA_TYPE float
 #define COEFF_TYPE float
 #define MAX_NUM  1.0f
@@ -95,7 +95,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
     {
         int src_idx = mad24(y, src_step, src_offset + (x << 2));
         int dst_idx = mad24(y, dst_step, dst_offset + x);
-#if defined (DEPTH_5)
+#ifdef DEPTH_5
         dst[dst_idx] = src[src_idx + bidx] * 0.114f + src[src_idx + 1] * 0.587f + src[src_idx + (bidx^2)] * 0.299f;
 #else
         dst[dst_idx] = (DATA_TYPE)CV_DESCALE((src[src_idx + bidx] * B2Y + src[src_idx + 1] * G2Y + src[src_idx + (bidx^2)] * R2Y), yuv_shift);
@@ -103,7 +103,7 @@ __kernel void RGB2Gray(int cols, int rows, int src_step, int dst_step,
     }
 }
 
-__kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int channels, int bidx,
+__kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
                        __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                        int src_offset, int dst_offset)
 {
@@ -119,8 +119,9 @@ __kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int chann
         dst[dst_idx] = val;
         dst[dst_idx + 1] = val;
         dst[dst_idx + 2] = val;
-        if (channels == 4)
+#if channels == 4
             dst[dst_idx + 3] = MAX_NUM;
+#endif
     }
 }
 
@@ -143,7 +144,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
         int dst_idx = mad24(y, dst_step, dst_offset + x);
         DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
 
-#if defined (DEPTH_5)
+#ifdef DEPTH_5
         __constant float * coeffs = c_RGB2YUVCoeffs_f;
         DATA_TYPE Y  = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
         DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
@@ -165,7 +166,7 @@ __kernel void RGB2YUV(int cols, int rows, int src_step, int dst_step,
 __constant float c_YUV2RGBCoeffs_f[5] = { 2.032f, -0.395f, -0.581f, 1.140f };
 __constant int   c_YUV2RGBCoeffs_i[5] = { 33292, -6472, -9519, 18678 };
 
-__kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, int channels,
+__kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
                       int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                       int src_offset, int dst_offset)
 {
@@ -179,7 +180,7 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, int channe
         int dst_idx = mad24(y, dst_step, dst_offset + x);
         DATA_TYPE yuv[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
 
-#if defined (DEPTH_5)
+#ifdef DEPTH_5
         __constant float * coeffs = c_YUV2RGBCoeffs_f;
         float b = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[3];
         float g = yuv[0] + (yuv[2] - HALF_MAX) * coeffs[2] + (yuv[1] - HALF_MAX) * coeffs[1];
@@ -194,8 +195,9 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step, int channe
         dst[dst_idx + bidx] = SAT_CAST( b );
         dst[dst_idx + 1]      = SAT_CAST( g );
         dst[dst_idx + (bidx^2)]   = SAT_CAST( r );
-        if (channels == 4)
+#if channels == 4
             dst[dst_idx + 3] = MAX_NUM;
+#endif
     }
 }
 
@@ -206,7 +208,7 @@ __constant int ITUR_BT_601_CVG = 852492;
 __constant int ITUR_BT_601_CVR = 1673527;
 __constant int ITUR_BT_601_SHIFT = 20;
 
-__kernel void YUV2RGBA_NV12(int cols, int rows, int src_step, int dst_step, int channels,
+__kernel void YUV2RGBA_NV12(int cols, int rows, int src_step, int dst_step,
                             int bidx, __global const uchar* src, __global uchar* dst,
                             int src_offset, int dst_offset)
 {
@@ -278,7 +280,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
 
         DATA_TYPE rgb[] = { src[src_idx], src[src_idx + 1], src[src_idx + 2] };
 
-#if defined (DEPTH_5)
+#ifdef DEPTH_5
         __constant float * coeffs = c_RGB2YCrCbCoeffs_f;
         DATA_TYPE Y  = rgb[0] * coeffs[bidx^2] + rgb[1] * coeffs[1] + rgb[2] * coeffs[bidx];
         DATA_TYPE Cr = (rgb[bidx^2] - Y) * coeffs[3] + HALF_MAX;
@@ -300,7 +302,7 @@ __kernel void RGB2YCrCb(int cols, int rows, int src_step, int dst_step,
 __constant float c_YCrCb2RGBCoeffs_f[4] = { 1.403f, -0.714f, -0.344f, 1.773f };
 __constant int   c_YCrCb2RGBCoeffs_i[4] = { 22987, -11698, -5636, 29049 };
 
-__kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, int channels,
+__kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
                         int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
                         int src_offset, int dst_offset)
 {
@@ -330,41 +332,73 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step, int chan
         dst[dst_idx + (bidx^2)] = SAT_CAST(r);
         dst[dst_idx + 1] = SAT_CAST(g);
         dst[dst_idx + bidx] = SAT_CAST(b);
-        if (channels == 4)
+#if channels == 4
             dst[dst_idx + 3] = MAX_NUM;
+#endif
     }
 }
 
 ///////////////////////////////////// RGB <-> XYZ //////////////////////////////////////
 
-#pragma OPENCL EXTENSION cl_amd_printf:enable
-
 __kernel void RGB2XYZ(int cols, int rows, int src_step, int dst_step,
-                        int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
-                        int src_offset, int dst_offset, __global COEFF_TYPE * coeffs)
+                      int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
+                      int src_offset, int dst_offset, __constant COEFF_TYPE * coeffs)
 {
-    int x = get_global_id(0);
-    int y = get_global_id(1);
+    int dx = get_global_id(0);
+    int dy = get_global_id(1);
 
-    if (y < rows && x < cols)
+    if (dy < rows && dx < cols)
     {
-        x <<= 2;
-        int src_idx = mad24(y, src_step, src_offset + x);
-        int dst_idx = mad24(y, dst_step, dst_offset + x);
+        dx <<= 2;
+        int src_idx = mad24(dy, src_step, src_offset + dx);
+        int dst_idx = mad24(dy, dst_step, dst_offset + dx);
 
         DATA_TYPE r = src[src_idx], g = src[src_idx + 1], b = src[src_idx + 2];
 
 #ifdef DEPTH_5
-        DATA_TYPE x = r * coeffs[0] + g * coeffs[1] + b * coeffs[2];
-        DATA_TYPE y = r * coeffs[3] + g * coeffs[4] + b * coeffs[5];
-        DATA_TYPE z = r * coeffs[6] + g * coeffs[7] + b * coeffs[8];
+        float x = r * coeffs[0] + g * coeffs[1] + b * coeffs[2];
+        float y = r * coeffs[3] + g * coeffs[4] + b * coeffs[5];
+        float z = r * coeffs[6] + g * coeffs[7] + b * coeffs[8];
 #else
-        DATA_TYPE x = CV_DESCALE(r * coeffs[0] + g * coeffs[1] + b * coeffs[2], xyz_shift);
-        DATA_TYPE y = CV_DESCALE(r * coeffs[3] + g * coeffs[4] + b * coeffs[5], xyz_shift);
-        DATA_TYPE z = CV_DESCALE(r * coeffs[6] + g * coeffs[7] + b * coeffs[8], xyz_shift);
+        int x = CV_DESCALE(r * coeffs[0] + g * coeffs[1] + b * coeffs[2], xyz_shift);
+        int y = CV_DESCALE(r * coeffs[3] + g * coeffs[4] + b * coeffs[5], xyz_shift);
+        int z = CV_DESCALE(r * coeffs[6] + g * coeffs[7] + b * coeffs[8], xyz_shift);
 #endif
         dst[dst_idx] = SAT_CAST(x);
         dst[dst_idx + 1] = SAT_CAST(y);
         dst[dst_idx + 2] = SAT_CAST(z);
     }
 }
+
+__kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
+                      int bidx, __global const DATA_TYPE* src, __global DATA_TYPE* dst,
+                      int src_offset, int dst_offset, __constant COEFF_TYPE * coeffs)
+{
+    int dx = get_global_id(0);
+    int dy = get_global_id(1);
+
+    if (dy < rows && dx < cols)
+    {
+        dx <<= 2;
+        int src_idx = mad24(dy, src_step, src_offset + dx);
+        int dst_idx = mad24(dy, dst_step, dst_offset + dx);
+
+        DATA_TYPE x = src[src_idx], y = src[src_idx + 1], z = src[src_idx + 2];
+
+#ifdef DEPTH_5
+        float b = x * coeffs[0] + y * coeffs[1] + z * coeffs[2];
+        float g = x * coeffs[3] + y * coeffs[4] + z * coeffs[5];
+        float r = x * coeffs[6] + y * coeffs[7] + z * coeffs[8];
+#else
+        int b = CV_DESCALE(x * coeffs[0] + y * coeffs[1] + z * coeffs[2], xyz_shift);
+        int g = CV_DESCALE(x * coeffs[3] + y * coeffs[4] + z * coeffs[5], xyz_shift);
+        int r = CV_DESCALE(x * coeffs[6] + y * coeffs[7] + z * coeffs[8], xyz_shift);
+#endif
+        dst[dst_idx] = SAT_CAST(b);
+        dst[dst_idx + 1] = SAT_CAST(g);
+        dst[dst_idx + 2] = SAT_CAST(r);
+#if channels == 4
+            dst[dst_idx + 3] = MAX_NUM;
+#endif
+    }
+}
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 0d9eb8f13..977fd206d 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -248,6 +248,23 @@ OCL_TEST_P(CvtColor, BGRA2XYZ)
     doTest(4, 3, CVTCODE(BGR2XYZ));
 }
 
+OCL_TEST_P(CvtColor, XYZ2RGB)
+{
+    doTest(3, 3, CVTCODE(XYZ2RGB));
+}
+OCL_TEST_P(CvtColor, XYZ2BGR)
+{
+    doTest(3, 3, CVTCODE(XYZ2BGR));
+}
+OCL_TEST_P(CvtColor, XYZ2RGBA)
+{
+    doTest(3, 4, CVTCODE(XYZ2RGB));
+}
+OCL_TEST_P(CvtColor, XYZ2BGRA)
+{
+    doTest(3, 4, CVTCODE(XYZ2BGR));
+}
+
 // YUV -> RGBA_NV12
 
 struct CvtColor_YUV420 :

From 3cc9502c90836c3fcda86edfb29dbf301477ae6b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Sun, 10 Nov 2013 20:56:18 +0400
Subject: [PATCH 09/45] added RGB[A] <-> BGR[A] conversion to ocl::cvtColor

---
 modules/ocl/src/color.cpp           |  59 +++++---
 modules/ocl/src/opencl/cvt_color.cl |  51 +++++--
 modules/ocl/test/test_color.cpp     | 201 +++++++---------------------
 3 files changed, 136 insertions(+), 175 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index 9688be08d..f27366a6a 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -79,7 +79,7 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
 static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
                          const oclMat & data = oclMat())
 {
-    std::string build_options = format("-D DEPTH_%d -D channels=%d", src.depth(), dst.channels());
+    std::string build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
 
@@ -101,6 +101,27 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
 
+static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
+{
+    std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
+                                       dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
+    int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
+    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
+
+    vector<pair<size_t , const void *> > args;
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+
+    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
+}
+
 static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
 {
     Size sz = src.size();
@@ -110,18 +131,24 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
 
     switch (code)
     {
-        /*
-        case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
-        case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
-        case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
-        case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
-        case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
-        case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
-        */
-    case CV_RGB2GRAY:
-    case CV_BGR2GRAY:
-    case CV_RGBA2GRAY:
-    case CV_BGRA2GRAY:
+    case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
+    case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
+    {
+        CV_Assert(scn == 3 || scn == 4);
+        dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3;
+        bool reverse = !(code == CV_BGR2BGRA || code == CV_BGRA2BGR);
+        dst.create(sz, CV_MAKE_TYPE(depth, dcn));
+        RGB_caller(src, dst, reverse);
+        break;
+    }
+    /*
+    case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
+    case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
+    case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
+    case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
+    */
+    case CV_RGB2GRAY: case CV_BGR2GRAY:
+    case CV_RGBA2GRAY: case CV_BGRA2GRAY:
     {
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
@@ -158,10 +185,8 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB_caller(src, dst, bidx, "YUV2RGB");
         break;
     }
-    case CV_YUV2RGB_NV12:
-    case CV_YUV2BGR_NV12:
-    case CV_YUV2RGBA_NV12:
-    case CV_YUV2BGRA_NV12:
+    case CV_YUV2RGB_NV12: case CV_YUV2BGR_NV12:
+    case CV_YUV2RGBA_NV12: case CV_YUV2BGRA_NV12:
     {
         CV_Assert(scn == 1);
         CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 0f44897a1..916d44bf9 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -119,8 +119,8 @@ __kernel void Gray2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
         dst[dst_idx] = val;
         dst[dst_idx + 1] = val;
         dst[dst_idx + 2] = val;
-#if channels == 4
-            dst[dst_idx + 3] = MAX_NUM;
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
 #endif
     }
 }
@@ -195,8 +195,8 @@ __kernel void YUV2RGB(int cols, int rows, int src_step, int dst_step,
         dst[dst_idx + bidx] = SAT_CAST( b );
         dst[dst_idx + 1]      = SAT_CAST( g );
         dst[dst_idx + (bidx^2)]   = SAT_CAST( r );
-#if channels == 4
-            dst[dst_idx + 3] = MAX_NUM;
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
 #endif
     }
 }
@@ -332,8 +332,8 @@ __kernel void YCrCb2RGB(int cols, int rows, int src_step, int dst_step,
         dst[dst_idx + (bidx^2)] = SAT_CAST(r);
         dst[dst_idx + 1] = SAT_CAST(g);
         dst[dst_idx + bidx] = SAT_CAST(b);
-#if channels == 4
-            dst[dst_idx + 3] = MAX_NUM;
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
 #endif
     }
 }
@@ -397,8 +397,43 @@ __kernel void XYZ2RGB(int cols, int rows, int src_step, int dst_step,
         dst[dst_idx] = SAT_CAST(b);
         dst[dst_idx + 1] = SAT_CAST(g);
         dst[dst_idx + 2] = SAT_CAST(r);
-#if channels == 4
-            dst[dst_idx + 3] = MAX_NUM;
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
+#endif
+    }
+}
+
+///////////////////////////////////// RGB[A] <-> BGR[A] //////////////////////////////////////
+
+__kernel void RGB(int cols, int rows, int src_step, int dst_step,
+                  __global const DATA_TYPE * src, __global DATA_TYPE * dst,
+                  int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+#ifdef REVERSE
+        dst[dst_idx] = src[src_idx + 2];
+        dst[dst_idx + 1] = src[src_idx + 1];
+        dst[dst_idx + 2] = src[src_idx];
+#elif defined ORDER
+        dst[dst_idx] = src[src_idx];
+        dst[dst_idx + 1] = src[src_idx + 1];
+        dst[dst_idx + 2] = src[src_idx + 2];
+#endif
+
+#if dcn == 4
+#if scn == 3
+        dst[dst_idx + 3] = MAX_NUM;
+#else
+        dst[dst_idx + 3] = src[src_idx + 3];
+#endif
 #endif
     }
 }
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 977fd206d..e98268425 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -121,149 +121,65 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
 
 #define CVTCODE(name) COLOR_ ## name
 
+// RGB[A] <-> BGR[A]
+
+OCL_TEST_P(CvtColor, BGR2BGRA) { doTest(3, 4, CVTCODE(BGR2BGRA)); }
+OCL_TEST_P(CvtColor, RGB2RGBA) { doTest(3, 4, CVTCODE(BGR2BGRA)); }
+OCL_TEST_P(CvtColor, BGRA2BGR) { doTest(4, 3, CVTCODE(BGRA2BGR)); }
+OCL_TEST_P(CvtColor, RGBA2RGB) { doTest(4, 3, CVTCODE(BGRA2BGR)); }
+OCL_TEST_P(CvtColor, BGR2RGBA) { doTest(3, 4, CVTCODE(BGR2RGBA)); }
+OCL_TEST_P(CvtColor, RGB2BGRA) { doTest(3, 4, CVTCODE(BGR2RGBA)); }
+OCL_TEST_P(CvtColor, RGBA2BGR) { doTest(4, 3, CVTCODE(RGBA2BGR)); }
+OCL_TEST_P(CvtColor, BGRA2RGB) { doTest(4, 3, CVTCODE(RGBA2BGR)); }
+OCL_TEST_P(CvtColor, BGR2RGB) { doTest(3, 3, CVTCODE(BGR2RGB)); }
+OCL_TEST_P(CvtColor, RGB2BGR) { doTest(3, 3, CVTCODE(BGR2RGB)); }
+OCL_TEST_P(CvtColor, BGRA2RGBA) { doTest(4, 4, CVTCODE(BGRA2RGBA)); }
+OCL_TEST_P(CvtColor, RGBA2BGRA) { doTest(4, 4, CVTCODE(BGRA2RGBA)); }
+
 // RGB <-> Gray
 
-OCL_TEST_P(CvtColor, RGB2GRAY)
-{
-    doTest(3, 1, CVTCODE(RGB2GRAY));
-}
-OCL_TEST_P(CvtColor, GRAY2RGB)
-{
-    doTest(1, 3, CVTCODE(GRAY2RGB));
-}
-
-OCL_TEST_P(CvtColor, BGR2GRAY)
-{
-    doTest(3, 1, CVTCODE(BGR2GRAY));
-}
-OCL_TEST_P(CvtColor, GRAY2BGR)
-{
-    doTest(1, 3, CVTCODE(GRAY2BGR));
-}
-
-OCL_TEST_P(CvtColor, RGBA2GRAY)
-{
-    doTest(4, 1, CVTCODE(RGBA2GRAY));
-}
-OCL_TEST_P(CvtColor, GRAY2RGBA)
-{
-    doTest(1, 4, CVTCODE(GRAY2RGBA));
-}
-
-OCL_TEST_P(CvtColor, BGRA2GRAY)
-{
-    doTest(4, 1, CVTCODE(BGRA2GRAY));
-}
-OCL_TEST_P(CvtColor, GRAY2BGRA)
-{
-    doTest(1, 4, CVTCODE(GRAY2BGRA));
-}
+OCL_TEST_P(CvtColor, RGB2GRAY) { doTest(3, 1, CVTCODE(RGB2GRAY)); }
+OCL_TEST_P(CvtColor, GRAY2RGB) { doTest(1, 3, CVTCODE(GRAY2RGB)); }
+OCL_TEST_P(CvtColor, BGR2GRAY) { doTest(3, 1, CVTCODE(BGR2GRAY)); }
+OCL_TEST_P(CvtColor, GRAY2BGR) { doTest(1, 3, CVTCODE(GRAY2BGR)); }
+OCL_TEST_P(CvtColor, RGBA2GRAY) { doTest(4, 1, CVTCODE(RGBA2GRAY)); }
+OCL_TEST_P(CvtColor, GRAY2RGBA) { doTest(1, 4, CVTCODE(GRAY2RGBA)); }
+OCL_TEST_P(CvtColor, BGRA2GRAY) { doTest(4, 1, CVTCODE(BGRA2GRAY)); }
+OCL_TEST_P(CvtColor, GRAY2BGRA) { doTest(1, 4, CVTCODE(GRAY2BGRA)); }
 
 // RGB <-> YUV
 
-OCL_TEST_P(CvtColor, RGB2YUV)
-{
-    doTest(3, 3, CVTCODE(RGB2YUV));
-}
-OCL_TEST_P(CvtColor, BGR2YUV)
-{
-    doTest(3, 3, CVTCODE(BGR2YUV));
-}
-OCL_TEST_P(CvtColor, RGBA2YUV)
-{
-    doTest(4, 3, CVTCODE(RGB2YUV));
-}
-OCL_TEST_P(CvtColor, BGRA2YUV)
-{
-    doTest(4, 3, CVTCODE(BGR2YUV));
-}
-OCL_TEST_P(CvtColor, YUV2RGB)
-{
-    doTest(3, 3, CVTCODE(YUV2RGB));
-}
-OCL_TEST_P(CvtColor, YUV2BGR)
-{
-    doTest(3, 3, CVTCODE(YUV2BGR));
-}
-OCL_TEST_P(CvtColor, YUV2RGBA)
-{
-    doTest(3, 4, CVTCODE(YUV2RGB));
-}
-OCL_TEST_P(CvtColor, YUV2BGRA)
-{
-    doTest(3, 4, CVTCODE(YUV2BGR));
-}
+OCL_TEST_P(CvtColor, RGB2YUV) { doTest(3, 3, CVTCODE(RGB2YUV)); }
+OCL_TEST_P(CvtColor, BGR2YUV) { doTest(3, 3, CVTCODE(BGR2YUV)); }
+OCL_TEST_P(CvtColor, RGBA2YUV) { doTest(4, 3, CVTCODE(RGB2YUV)); }
+OCL_TEST_P(CvtColor, BGRA2YUV) { doTest(4, 3, CVTCODE(BGR2YUV)); }
+OCL_TEST_P(CvtColor, YUV2RGB) { doTest(3, 3, CVTCODE(YUV2RGB)); }
+OCL_TEST_P(CvtColor, YUV2BGR) { doTest(3, 3, CVTCODE(YUV2BGR)); }
+OCL_TEST_P(CvtColor, YUV2RGBA) { doTest(3, 4, CVTCODE(YUV2RGB)); }
+OCL_TEST_P(CvtColor, YUV2BGRA) { doTest(3, 4, CVTCODE(YUV2BGR)); }
 
 // RGB <-> YCrCb
 
-OCL_TEST_P(CvtColor, RGB2YCrCb)
-{
-    doTest(3, 3, CVTCODE(RGB2YCrCb));
-}
-OCL_TEST_P(CvtColor, BGR2YCrCb)
-{
-    doTest(3, 3, CVTCODE(BGR2YCrCb));
-}
-OCL_TEST_P(CvtColor, RGBA2YCrCb)
-{
-    doTest(4, 3, CVTCODE(RGB2YCrCb));
-}
-OCL_TEST_P(CvtColor, BGRA2YCrCb)
-{
-    doTest(4, 3, CVTCODE(BGR2YCrCb));
-}
-OCL_TEST_P(CvtColor, YCrCb2RGB)
-{
-    doTest(3, 3, CVTCODE(YCrCb2RGB));
-}
-OCL_TEST_P(CvtColor, YCrCb2BGR)
-{
-    doTest(3, 3, CVTCODE(YCrCb2BGR));
-}
-OCL_TEST_P(CvtColor, YCrCb2RGBA)
-{
-    doTest(3, 4, CVTCODE(YCrCb2RGB));
-}
-OCL_TEST_P(CvtColor, YCrCb2BGRA)
-{
-    doTest(3, 4, CVTCODE(YCrCb2BGR));
-}
+OCL_TEST_P(CvtColor, RGB2YCrCb) { doTest(3, 3, CVTCODE(RGB2YCrCb)); }
+OCL_TEST_P(CvtColor, BGR2YCrCb) { doTest(3, 3, CVTCODE(BGR2YCrCb)); }
+OCL_TEST_P(CvtColor, RGBA2YCrCb) { doTest(4, 3, CVTCODE(RGB2YCrCb)); }
+OCL_TEST_P(CvtColor, BGRA2YCrCb) { doTest(4, 3, CVTCODE(BGR2YCrCb)); }
+OCL_TEST_P(CvtColor, YCrCb2RGB) { doTest(3, 3, CVTCODE(YCrCb2RGB)); }
+OCL_TEST_P(CvtColor, YCrCb2BGR) { doTest(3, 3, CVTCODE(YCrCb2BGR)); }
+OCL_TEST_P(CvtColor, YCrCb2RGBA) { doTest(3, 4, CVTCODE(YCrCb2RGB)); }
+OCL_TEST_P(CvtColor, YCrCb2BGRA) { doTest(3, 4, CVTCODE(YCrCb2BGR)); }
 
 // RGB <-> XYZ
 
-OCL_TEST_P(CvtColor, RGB2XYZ)
-{
-    doTest(3, 3, CVTCODE(RGB2XYZ));
-}
-OCL_TEST_P(CvtColor, BGR2XYZ)
-{
-    doTest(3, 3, CVTCODE(BGR2XYZ));
-}
-OCL_TEST_P(CvtColor, RGBA2XYZ)
-{
-    doTest(4, 3, CVTCODE(RGB2XYZ));
-}
-OCL_TEST_P(CvtColor, BGRA2XYZ)
-{
-    doTest(4, 3, CVTCODE(BGR2XYZ));
-}
+OCL_TEST_P(CvtColor, RGB2XYZ) { doTest(3, 3, CVTCODE(RGB2XYZ)); }
+OCL_TEST_P(CvtColor, BGR2XYZ) { doTest(3, 3, CVTCODE(BGR2XYZ)); }
+OCL_TEST_P(CvtColor, RGBA2XYZ) { doTest(4, 3, CVTCODE(RGB2XYZ)); }
+OCL_TEST_P(CvtColor, BGRA2XYZ) { doTest(4, 3, CVTCODE(BGR2XYZ)); }
 
-OCL_TEST_P(CvtColor, XYZ2RGB)
-{
-    doTest(3, 3, CVTCODE(XYZ2RGB));
-}
-OCL_TEST_P(CvtColor, XYZ2BGR)
-{
-    doTest(3, 3, CVTCODE(XYZ2BGR));
-}
-OCL_TEST_P(CvtColor, XYZ2RGBA)
-{
-    doTest(3, 4, CVTCODE(XYZ2RGB));
-}
-OCL_TEST_P(CvtColor, XYZ2BGRA)
-{
-    doTest(3, 4, CVTCODE(XYZ2BGR));
-}
+OCL_TEST_P(CvtColor, XYZ2RGB) { doTest(3, 3, CVTCODE(XYZ2RGB)); }
+OCL_TEST_P(CvtColor, XYZ2BGR) { doTest(3, 3, CVTCODE(XYZ2BGR)); }
+OCL_TEST_P(CvtColor, XYZ2RGBA) { doTest(3, 4, CVTCODE(XYZ2RGB)); }
+OCL_TEST_P(CvtColor, XYZ2BGRA) { doTest(3, 4, CVTCODE(XYZ2BGR)); }
 
 // YUV -> RGBA_NV12
 
@@ -289,25 +205,10 @@ struct CvtColor_YUV420 :
     }
 };
 
-OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12)
-{
-    doTest(1, 4, CV_YUV2RGBA_NV12);
-}
-
-OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12)
-{
-    doTest(1, 4, CV_YUV2BGRA_NV12);
-}
-
-OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12)
-{
-    doTest(1, 3, CV_YUV2RGB_NV12);
-}
-
-OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12)
-{
-    doTest(1, 3, CV_YUV2BGR_NV12);
-}
+OCL_TEST_P(CvtColor_YUV420, YUV2RGBA_NV12) { doTest(1, 4, CV_YUV2RGBA_NV12); }
+OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { doTest(1, 4, CV_YUV2BGRA_NV12); }
+OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { doTest(1, 3, CV_YUV2RGB_NV12); }
+OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { doTest(1, 3, CV_YUV2BGR_NV12); }
 
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor,
                             testing::Combine(

From 5beb88724709d4ac81abe37323187c752024b36c Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 11 Nov 2013 00:53:26 +0400
Subject: [PATCH 10/45] replaced custom PI by OpenCL M_PI constant

---
 modules/ocl/src/opencl/arithm_phase.cl       | 14 +++++++-------
 modules/ocl/src/opencl/arithm_polarToCart.cl | 10 +++++++---
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl
index f9835948c..978fd3b97 100644
--- a/modules/ocl/src/opencl/arithm_phase.cl
+++ b/modules/ocl/src/opencl/arithm_phase.cl
@@ -45,16 +45,16 @@
 //
 
 #if defined (DOUBLE_SUPPORT)
-    #ifdef cl_khr_fp64
-        #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #elif defined (cl_amd_fp64)
+    #ifdef cl_amd_fp64
         #pragma OPENCL EXTENSION cl_amd_fp64:enable
+    #elif defined (cl_khr_fp64)
+        #pragma OPENCL EXTENSION cl_khr_fp64:enable
     #endif
-    #define CV_PI 3.1415926535897932384626433832795
-    #define CV_2PI 2*CV_PI
+    #define CV_PI M_PI
+    #define CV_2PI (2 * CV_PI)
 #else
-    #define CV_PI 3.1415926535897932384626433832795f
-    #define CV_2PI 2*CV_PI
+    #define CV_PI M_PI_F
+    #define CV_2PI (2 * CV_PI)
 #endif
 
 /**************************************phase inradians**************************************/
diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl
index 8469cdb09..9e2e860e7 100644
--- a/modules/ocl/src/opencl/arithm_polarToCart.cl
+++ b/modules/ocl/src/opencl/arithm_polarToCart.cl
@@ -44,10 +44,14 @@
 //M*/
 
 #ifdef DOUBLE_SUPPORT
-    #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #define CV_PI   3.1415926535897932384626433832795
+    #ifdef cl_amd_fp64
+        #pragma OPENCL EXTENSION cl_amd_fp64:enable
+    #elif defined (cl_khr_fp64)
+        #pragma OPENCL EXTENSION cl_khr_fp64:enable
+    #endif
+    #define CV_PI M_PI
 #else
-    #define CV_PI   3.1415926535897932384626433832795f
+    #define CV_PI M_PI_F
 #endif
 
 /////////////////////////////////////////////////////////////////////////////////////////////////////

From 50579d2524cf7235f113b7e414121427d0cc339d Mon Sep 17 00:00:00 2001
From: perping <erping@multicorewareinc.com>
Date: Mon, 11 Nov 2013 14:31:02 +0800
Subject: [PATCH 11/45] Make Integral sum support cv_32f, sqsum support cv_64f.

---
 modules/ocl/include/opencv2/ocl/ocl.hpp    |   6 +-
 modules/ocl/src/imgproc.cpp                |  50 ++++---
 modules/ocl/src/opencl/imgproc_integral.cl | 143 +++++++++++----------
 modules/ocl/test/test_imgproc.cpp          |  28 ++--
 4 files changed, 134 insertions(+), 93 deletions(-)

diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index 37c533567..bdf83257c 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -861,10 +861,10 @@ namespace cv
         CV_EXPORTS void warpPerspective(const oclMat &src, oclMat &dst, const Mat &M, Size dsize, int flags = INTER_LINEAR);
 
         //! computes the integral image and integral for the squared image
-        // sum will have CV_32S type, sqsum - CV32F type
+        // sum will support CV_32S, CV_32F, sqsum - support CV32F, CV_64F
         // supports only CV_8UC1 source type
-        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum);
-        CV_EXPORTS void integral(const oclMat &src, oclMat &sum);
+        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1 );
+        CV_EXPORTS void integral(const oclMat &src, oclMat &sum, int sdepth=-1 );
         CV_EXPORTS void cornerHarris(const oclMat &src, oclMat &dst, int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
         CV_EXPORTS void cornerHarris_dxdy(const oclMat &src, oclMat &dst, oclMat &Dx, oclMat &Dy,
             int blockSize, int ksize, double k, int bordertype = cv::BORDER_DEFAULT);
diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index 193cb43a6..54df4d726 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -783,7 +783,7 @@ namespace cv
         ////////////////////////////////////////////////////////////////////////
         // integral
 
-        void integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
+        void integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth)
         {
             CV_Assert(src.type() == CV_8UC1);
             if (!src.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE) && src.depth() == CV_64F)
@@ -792,6 +792,12 @@ namespace cv
                 return;
             }
 
+            int depth = src.depth();
+            if( sdepth <= 0 )
+                sdepth = CV_32S;
+            sdepth = CV_MAT_DEPTH(sdepth);
+            int type = CV_MAKE_TYPE(sdepth, 1);
+
             int vlen = 4;
             int offset = src.offset / vlen;
             int pre_invalid = src.offset % vlen;
@@ -799,17 +805,26 @@ namespace cv
 
             oclMat t_sum , t_sqsum;
             int w = src.cols + 1, h = src.rows + 1;
-            int depth = src.depth() == CV_8U ? CV_32S : CV_64F;
-            int type = CV_MAKE_TYPE(depth, 1);
+
+            char build_option[250];
+            if(Context::getContext()->supportsFeature(ocl::FEATURE_CL_DOUBLE))
+            {
+                t_sqsum.create(src.cols, src.rows, CV_64FC1);
+                sqsum.create(h, w, CV_64FC1);
+                sprintf(build_option, "-D TYPE=double -D TYPE4=double4 -D convert_TYPE4=convert_double4");
+            }
+            else
+            {
+                t_sqsum.create(src.cols, src.rows, CV_32FC1);
+                sqsum.create(h, w, CV_32FC1);
+                sprintf(build_option, "-D TYPE=float -D TYPE4=float4 -D convert_TYPE4=convert_float4");
+            }
 
             t_sum.create(src.cols, src.rows, type);
             sum.create(h, w, type);
 
-            t_sqsum.create(src.cols, src.rows, CV_32FC1);
-            sqsum.create(h, w, CV_32FC1);
-
-            int sum_offset = sum.offset / vlen;
-            int sqsum_offset = sqsum.offset / vlen;
+            int sum_offset = sum.offset / sum.elemSize();
+            int sqsum_offset = sqsum.offset / sqsum.elemSize();
 
             vector<pair<size_t , const void *> > args;
             args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data ));
@@ -821,8 +836,9 @@ namespace cv
             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.cols ));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
+            args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sqsum.step));
             size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, depth);
+            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_cols", gt, lt, args, -1, sdepth, build_option);
 
             args.clear();
             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
@@ -832,15 +848,16 @@ namespace cv
             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.rows ));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.cols ));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step ));
+            args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sqsum.step));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum.step));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&sqsum_offset));
             size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, depth);
+            openCLExecuteKernel(src.clCxt, &imgproc_integral, "integral_rows", gt2, lt2, args, -1, sdepth, build_option);
         }
 
-        void integral(const oclMat &src, oclMat &sum)
+        void integral(const oclMat &src, oclMat &sum, int sdepth)
         {
             CV_Assert(src.type() == CV_8UC1);
             int vlen = 4;
@@ -848,10 +865,13 @@ namespace cv
             int pre_invalid = src.offset % vlen;
             int vcols = (pre_invalid + src.cols + vlen - 1) / vlen;
 
+            if( sdepth <= 0 )
+                sdepth = CV_32S;
+            sdepth = CV_MAT_DEPTH(sdepth);
+            int type = CV_MAKE_TYPE(sdepth, 1);
+
             oclMat t_sum;
             int w = src.cols + 1, h = src.rows + 1;
-            int depth = src.depth() == CV_8U ? CV_32S : CV_32F;
-            int type = CV_MAKE_TYPE(depth, 1);
 
             t_sum.create(src.cols, src.rows, type);
             sum.create(h, w, type);
@@ -867,7 +887,7 @@ namespace cv
             args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step ));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&t_sum.step));
             size_t gt[3] = {((vcols + 1) / 2) * 256, 1, 1}, lt[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, depth);
+            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_cols", gt, lt, args, -1, sdepth);
 
             args.clear();
             args.push_back( make_pair( sizeof(cl_mem) , (void *)&t_sum.data ));
@@ -878,7 +898,7 @@ namespace cv
             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum.step));
             args.push_back( make_pair( sizeof(cl_int) , (void *)&sum_offset));
             size_t gt2[3] = {t_sum.cols  * 32, 1, 1}, lt2[3] = {256, 1, 1};
-            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, depth);
+            openCLExecuteKernel(src.clCxt, &imgproc_integral_sum, "integral_sum_rows", gt2, lt2, args, -1, sdepth);
         }
 
         /////////////////////// corner //////////////////////////////
diff --git a/modules/ocl/src/opencl/imgproc_integral.cl b/modules/ocl/src/opencl/imgproc_integral.cl
index 05e76f964..6b2427af3 100644
--- a/modules/ocl/src/opencl/imgproc_integral.cl
+++ b/modules/ocl/src/opencl/imgproc_integral.cl
@@ -49,6 +49,9 @@
 #elif defined (cl_amd_fp64)
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #endif
+#define CONVERT(step) ((step)>>1)
+#else
+#define CONVERT(step) ((step))
 #endif
 #define LSIZE 256
 #define LSIZE_1 255
@@ -60,17 +63,17 @@
 #define GET_CONFLICT_OFFSET(lid) ((lid) >> LOG_NUM_BANKS)
 
 
-kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global float *sqsum,
-                          int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
+kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global TYPE *sqsum,
+                          int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step,int dst1_step)
 {
     int lid = get_local_id(0);
     int gid = get_group_id(0);
     int4 src_t[2], sum_t[2];
-    float4 sqsum_t[2];
+    TYPE4 sqsum_t[2];
     __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
+    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
     __local int* sum_p;
-    __local float* sqsum_p;
+    __local TYPE* sqsum_p;
     src_step = src_step >> 2;
     gid = gid << 1;
     for(int i = 0; i < rows; i =i + LSIZE_1)
@@ -79,17 +82,17 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
         src_t[1] = (i + lid < rows ? convert_int4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : 0);
 
         sum_t[0] = (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
         sum_t[1] =  (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
         barrier(CLK_LOCAL_MEM_FENCE);
 
         int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
         lm_sum[0][bf_loc] = src_t[0];
-        lm_sqsum[0][bf_loc] = convert_float4(src_t[0] * src_t[0]);
+        lm_sqsum[0][bf_loc] = convert_TYPE4(src_t[0] * src_t[0]);
 
         lm_sum[1][bf_loc] = src_t[1];
-        lm_sqsum[1][bf_loc] = convert_float4(src_t[1] * src_t[1]);
+        lm_sqsum[1][bf_loc] = convert_TYPE4(src_t[1] * src_t[1]);
 
         int offset = 1;
         for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
@@ -130,7 +133,8 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
             }
         }
         barrier(CLK_LOCAL_MEM_FENCE);
-        int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
+        int loc_s0 = gid * dst_step  + i + lid - 1 - pre_invalid * dst_step /4, loc_s1 = loc_s0 + dst_step ;
+        int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE),loc_sq1 = loc_sq0 + CONVERT(dst1_step);
         if(lid > 0 && (i+lid) <= rows)
         {
             lm_sum[0][bf_loc] += sum_t[0];
@@ -138,20 +142,20 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
             lm_sqsum[0][bf_loc] += sqsum_t[0];
             lm_sqsum[1][bf_loc] += sqsum_t[1];
             sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
                 if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
                 sum[loc_s0 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_s0 + k * dst_step / 4] = sqsum_p[k];
+                sqsum[loc_sq0 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
             }
             sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
                 if(gid * 4 + k + 4 >= cols + pre_invalid) break;
                 sum[loc_s1 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_s1 + k * dst_step / 4] = sqsum_p[k];
+                sqsum[loc_sq1 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
             }
         }
         barrier(CLK_LOCAL_MEM_FENCE);
@@ -159,30 +163,32 @@ kernel void integral_cols_D4(__global uchar4 *src,__global int *sum ,__global fl
 }
 
 
-kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__global int *sum ,
-                          __global float *sqsum,int rows,int cols,int src_step,int sum_step,
+kernel void integral_rows_D4(__global int4 *srcsum,__global TYPE4 * srcsqsum,__global int *sum ,
+                          __global TYPE *sqsum,int rows,int cols,int src_step,int src1_step,int sum_step,
                           int sqsum_step,int sum_offset,int sqsum_offset)
 {
     int lid = get_local_id(0);
     int gid = get_group_id(0);
     int4 src_t[2], sum_t[2];
-    float4 sqsrc_t[2],sqsum_t[2];
+    TYPE4 sqsrc_t[2],sqsum_t[2];
     __local int4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
+    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
     __local int *sum_p;
-    __local float *sqsum_p;
+    __local TYPE *sqsum_p;
     src_step = src_step >> 4;
+    src1_step = (src1_step / sizeof(TYPE)) >> 2 ;
+    gid <<= 1;
     for(int i = 0; i < rows; i =i + LSIZE_1)
     {
-        src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (int4)0;
-        sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : (float4)0;
-        src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (int4)0;
-        sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
+        src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid ] : (int4)0;
+        sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid ] : (TYPE4)0;
+        src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid  + 1] : (int4)0;
+        sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid  + 1] : (TYPE4)0;
 
         sum_t[0] =  (i == 0 ? 0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] =  (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[0] =  (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
         sum_t[1] =  (i == 0 ? 0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
         barrier(CLK_LOCAL_MEM_FENCE);
 
         int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
@@ -238,17 +244,18 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
         }
         if(i + lid == 0)
         {
-            int loc0 = gid * 2 * sum_step;
-            int loc1 = gid * 2 * sqsum_step;
+            int loc0 = gid  * sum_step;
+            int loc1 = gid  * CONVERT(sqsum_step);
             for(int k = 1; k <= 8; k++)
             {
-                if(gid * 8 + k > cols) break;
+                if(gid * 4 + k > cols) break;
                 sum[sum_offset + loc0 + k * sum_step / 4] = 0;
-                sqsum[sqsum_offset + loc1 + k * sqsum_step / 4] = 0;
+                sqsum[sqsum_offset + loc1 + k * sqsum_step / sizeof(TYPE)] = 0;
             }
         }
-        int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
-        int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
+        int loc_s0 = sum_offset + gid  * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
+        int loc_sq0 = sqsum_offset + gid  * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
+
         if(lid > 0 && (i+lid) <= rows)
         {
             lm_sum[0][bf_loc] += sum_t[0];
@@ -256,37 +263,37 @@ kernel void integral_rows_D4(__global int4 *srcsum,__global float4 * srcsqsum,__
             lm_sqsum[0][bf_loc] += sqsum_t[0];
             lm_sqsum[1][bf_loc] += sqsum_t[1];
             sum_p = (__local int*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
-                if(gid * 8 + k >= cols) break;
+                if(gid * 4 + k >= cols) break;
                 sum[loc_s0 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq0 + k * sqsum_step / 4] = sqsum_p[k];
+                sqsum[loc_sq0 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
             }
             sum_p = (__local int*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
-                if(gid * 8 + 4 + k >= cols) break;
+                if(gid * 4 + 4 + k >= cols) break;
                 sum[loc_s1 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq1 + k * sqsum_step / 4] = sqsum_p[k];
+                sqsum[loc_sq1 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
             }
-        }
+          }
         barrier(CLK_LOCAL_MEM_FENCE);
     }
 }
 
-kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global float *sqsum,
-                          int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step)
+kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global TYPE *sqsum,
+                          int src_offset,int pre_invalid,int rows,int cols,int src_step,int dst_step, int dst1_step)
 {
     int lid = get_local_id(0);
     int gid = get_group_id(0);
     float4 src_t[2], sum_t[2];
-    float4 sqsum_t[2];
+    TYPE4 sqsum_t[2];
     __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
+    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
     __local float* sum_p;
-    __local float* sqsum_p;
+    __local TYPE* sqsum_p;
     src_step = src_step >> 2;
     gid = gid << 1;
     for(int i = 0; i < rows; i =i + LSIZE_1)
@@ -295,17 +302,17 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
         src_t[1] = (i + lid < rows ? convert_float4(src[src_offset + (lid+i) * src_step + min(gid + 1, cols - 1)]) : (float4)0);
 
         sum_t[0] = (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] = (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[0] = (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
         sum_t[1] =  (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
         barrier(CLK_LOCAL_MEM_FENCE);
 
         int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
         lm_sum[0][bf_loc] = src_t[0];
-        lm_sqsum[0][bf_loc] = convert_float4(src_t[0] * src_t[0]);
+        lm_sqsum[0][bf_loc] = convert_TYPE4(src_t[0] * src_t[0]);
 
         lm_sum[1][bf_loc] = src_t[1];
-        lm_sqsum[1][bf_loc] = convert_float4(src_t[1] * src_t[1]);
+        lm_sqsum[1][bf_loc] = convert_TYPE4(src_t[1] * src_t[1]);
 
         int offset = 1;
         for(int d = LSIZE >> 1 ;  d > 0; d>>=1)
@@ -347,6 +354,7 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
         }
         barrier(CLK_LOCAL_MEM_FENCE);
         int loc_s0 = gid * dst_step + i + lid - 1 - pre_invalid * dst_step / 4, loc_s1 = loc_s0 + dst_step ;
+        int loc_sq0 = gid * CONVERT(dst1_step) + i + lid - 1 - pre_invalid * dst1_step / sizeof(TYPE), loc_sq1 = loc_sq0 + CONVERT(dst1_step);
         if(lid > 0 && (i+lid) <= rows)
         {
             lm_sum[0][bf_loc] += sum_t[0];
@@ -354,20 +362,20 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
             lm_sqsum[0][bf_loc] += sqsum_t[0];
             lm_sqsum[1][bf_loc] += sqsum_t[1];
             sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
                 if(gid * 4 + k >= cols + pre_invalid || gid * 4 + k < pre_invalid) continue;
                 sum[loc_s0 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_s0 + k * dst_step / 4] = sqsum_p[k];
+                sqsum[loc_sq0 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
             }
             sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
                 if(gid * 4 + k + 4 >= cols + pre_invalid) break;
                 sum[loc_s1 + k * dst_step / 4] = sum_p[k];
-                sqsum[loc_s1 + k * dst_step / 4] = sqsum_p[k];
+                sqsum[loc_sq1 + k * dst1_step / sizeof(TYPE)] = sqsum_p[k];
             }
         }
         barrier(CLK_LOCAL_MEM_FENCE);
@@ -375,30 +383,31 @@ kernel void integral_cols_D5(__global uchar4 *src,__global float *sum ,__global
 }
 
 
-kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,__global float *sum ,
-                          __global float *sqsum,int rows,int cols,int src_step,int sum_step,
+kernel void integral_rows_D5(__global float4 *srcsum,__global TYPE4 * srcsqsum,__global float *sum ,
+                          __global TYPE *sqsum,int rows,int cols,int src_step,int src1_step, int sum_step,
                           int sqsum_step,int sum_offset,int sqsum_offset)
 {
     int lid = get_local_id(0);
     int gid = get_group_id(0);
     float4 src_t[2], sum_t[2];
-    float4 sqsrc_t[2],sqsum_t[2];
+    TYPE4 sqsrc_t[2],sqsum_t[2];
     __local float4 lm_sum[2][LSIZE + LOG_LSIZE];
-    __local float4 lm_sqsum[2][LSIZE + LOG_LSIZE];
+    __local TYPE4 lm_sqsum[2][LSIZE + LOG_LSIZE];
     __local float *sum_p;
-    __local float *sqsum_p;
+    __local TYPE *sqsum_p;
     src_step = src_step >> 4;
+    src1_step = (src1_step / sizeof(TYPE)) >> 2;
     for(int i = 0; i < rows; i =i + LSIZE_1)
     {
         src_t[0] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2] : (float4)0;
-        sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2] : (float4)0;
+        sqsrc_t[0] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid * 2] : (TYPE4)0;
         src_t[1] = i + lid < rows ? srcsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
-        sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src_step + gid * 2 + 1] : (float4)0;
+        sqsrc_t[1] = i + lid < rows ? srcsqsum[(lid+i) * src1_step + gid * 2 + 1] : (TYPE4)0;
 
         sum_t[0] =  (i == 0 ? (float4)0 : lm_sum[0][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[0] =  (i == 0 ? (float4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[0] =  (i == 0 ? (TYPE4)0 : lm_sqsum[0][LSIZE_2 + LOG_LSIZE]);
         sum_t[1] =  (i == 0 ? (float4)0 : lm_sum[1][LSIZE_2 + LOG_LSIZE]);
-        sqsum_t[1] =  (i == 0 ? (float4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
+        sqsum_t[1] =  (i == 0 ? (TYPE4)0 : lm_sqsum[1][LSIZE_2 + LOG_LSIZE]);
         barrier(CLK_LOCAL_MEM_FENCE);
 
         int bf_loc = lid + GET_CONFLICT_OFFSET(lid);
@@ -455,16 +464,16 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
         if(i + lid == 0)
         {
             int loc0 = gid * 2 * sum_step;
-            int loc1 = gid * 2 * sqsum_step;
+            int loc1 = gid * 2 * CONVERT(sqsum_step);
             for(int k = 1; k <= 8; k++)
             {
                 if(gid * 8 + k > cols) break;
                 sum[sum_offset + loc0 + k * sum_step / 4] = 0;
-                sqsum[sqsum_offset + loc1 + k * sqsum_step / 4] = 0;
+                sqsum[sqsum_offset + loc1 + k * sqsum_step / sizeof(TYPE)] = 0;
             }
         }
         int loc_s0 = sum_offset + gid * 2 * sum_step + sum_step / 4 + i + lid, loc_s1 = loc_s0 + sum_step ;
-        int loc_sq0 = sqsum_offset + gid * 2 * sqsum_step + sqsum_step / 4 + i + lid, loc_sq1 = loc_sq0 + sqsum_step ;
+        int loc_sq0 = sqsum_offset + gid * 2 * CONVERT(sqsum_step) + sqsum_step / sizeof(TYPE) + i + lid, loc_sq1 = loc_sq0 + CONVERT(sqsum_step) ;
         if(lid > 0 && (i+lid) <= rows)
         {
             lm_sum[0][bf_loc] += sum_t[0];
@@ -472,20 +481,20 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
             lm_sqsum[0][bf_loc] += sqsum_t[0];
             lm_sqsum[1][bf_loc] += sqsum_t[1];
             sum_p = (__local float*)(&(lm_sum[0][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[0][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[0][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
                 if(gid * 8 + k >= cols) break;
                 sum[loc_s0 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq0 + k * sqsum_step / 4] = sqsum_p[k];
+                sqsum[loc_sq0 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
             }
             sum_p = (__local float*)(&(lm_sum[1][bf_loc]));
-            sqsum_p = (__local float*)(&(lm_sqsum[1][bf_loc]));
+            sqsum_p = (__local TYPE*)(&(lm_sqsum[1][bf_loc]));
             for(int k = 0; k < 4; k++)
             {
                 if(gid * 8 + 4 + k >= cols) break;
                 sum[loc_s1 + k * sum_step / 4] = sum_p[k];
-                sqsum[loc_sq1 + k * sqsum_step / 4] = sqsum_p[k];
+                sqsum[loc_sq1 + k * sqsum_step / sizeof(TYPE)] = sqsum_p[k];
             }
         }
         barrier(CLK_LOCAL_MEM_FENCE);
diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp
index d30def816..353beaa59 100644
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -275,23 +275,33 @@ OCL_TEST_P(CornerHarris, Mat)
 
 //////////////////////////////////integral/////////////////////////////////////////////////
 
-typedef ImgprocTestBase Integral;
+struct Integral :
+        public ImgprocTestBase
+{
+    int sdepth;
 
+    virtual void SetUp()
+    {
+        type = GET_PARAM(0);
+        blockSize = GET_PARAM(1);
+        sdepth = GET_PARAM(2);
+        useRoi = GET_PARAM(3);
+    }
+};
 OCL_TEST_P(Integral, Mat1)
 {
     for (int j = 0; j < LOOP_TIMES; j++)
     {
         random_roi();
 
-        ocl::integral(gsrc_roi, gdst_roi);
-        integral(src_roi, dst_roi);
+        ocl::integral(gsrc_roi, gdst_roi, sdepth);
+        integral(src_roi, dst_roi, sdepth);
 
         Near();
     }
 }
 
-// TODO wrong output type
-OCL_TEST_P(Integral, DISABLED_Mat2)
+OCL_TEST_P(Integral, Mat2)
 {
     Mat dst1;
     ocl::oclMat gdst1;
@@ -300,10 +310,12 @@ OCL_TEST_P(Integral, DISABLED_Mat2)
     {
         random_roi();
 
-        integral(src_roi, dst1, dst_roi);
-        ocl::integral(gsrc_roi, gdst1, gdst_roi);
+        integral(src_roi, dst_roi, dst1, sdepth);
+        ocl::integral(gsrc_roi, gdst_roi, gdst1, sdepth);
 
         Near();
+        if(gdst1.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE))
+            EXPECT_MAT_NEAR(dst1, Mat(gdst1), 0.);
     }
 }
 
@@ -543,7 +555,7 @@ INSTANTIATE_TEST_CASE_P(Imgproc, CornerHarris, Combine(
 INSTANTIATE_TEST_CASE_P(Imgproc, Integral, Combine(
                             Values((MatType)CV_8UC1), // TODO does not work with CV_32F, CV_64F
                             Values(0), // not used
-                            Values(0), // not used
+                            Values((MatType)CV_32SC1, (MatType)CV_32FC1),
                             Bool()));
 
 INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine(

From 73b34e3f858c74c2da435a9b587455fdd830d161 Mon Sep 17 00:00:00 2001
From: perping <erping@multicorewareinc.com>
Date: Mon, 11 Nov 2013 15:06:58 +0800
Subject: [PATCH 12/45] fix warning.

---
 modules/ocl/doc/image_processing.rst | 8 ++++----
 modules/ocl/src/imgproc.cpp          | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/modules/ocl/doc/image_processing.rst b/modules/ocl/doc/image_processing.rst
index 7dde475cc..100876a15 100644
--- a/modules/ocl/doc/image_processing.rst
+++ b/modules/ocl/doc/image_processing.rst
@@ -65,15 +65,15 @@ ocl::integral
 -----------------
 Computes an integral image.
 
-.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum)
+.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, oclMat &sqsum, int sdepth=-1)
 
-.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum)
+.. ocv:function:: void ocl::integral(const oclMat &src, oclMat &sum, int sdepth=-1)
 
     :param src: Source image. Only  ``CV_8UC1`` images are supported for now.
 
-    :param sum: Integral image containing 32-bit unsigned integer values packed into  ``CV_32SC1`` .
+    :param sum: Integral image containing 32-bit unsigned integer or 32-bit floating-point .
 
-    :param sqsum: Sqsum values is ``CV_32FC1`` type.
+    :param sqsum: Sqsum values is ``CV_32FC1`` or ``CV_64FC1`` type.
 
 .. seealso:: :ocv:func:`integral`
 
diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index 54df4d726..1e0560563 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -792,7 +792,6 @@ namespace cv
                 return;
             }
 
-            int depth = src.depth();
             if( sdepth <= 0 )
                 sdepth = CV_32S;
             sdepth = CV_MAT_DEPTH(sdepth);

From 1f421fce01efbfc2472d0b191cfccdd98da2f708 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 11 Nov 2013 13:04:55 +0400
Subject: [PATCH 13/45] added RGB5x5 <-> RGB conversion

---
 modules/ocl/src/color.cpp           | 67 ++++++++++++++++++++++++++++-
 modules/ocl/src/opencl/cvt_color.cl | 58 +++++++++++++++++++++++++
 modules/ocl/test/test_color.cpp     | 28 ++++++++++++
 3 files changed, 151 insertions(+), 2 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index f27366a6a..f9bccc89b 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -122,6 +122,50 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
     openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
 }
 
+static void RGB5x52RGB_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits)
+{
+    std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
+                                       src.depth(), greenbits, dst.channels());
+    int src_offset = src.offset >> 1, src_step = src.step >> 1;
+    int dst_offset = (int)dst.offset, dst_step = (int)dst.step;
+
+    vector<pair<size_t , const void *> > args;
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+
+    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB5x52RGB", gt, lt, args, -1, -1, build_options.c_str());
+}
+
+static void RGB2RGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits)
+{
+    std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
+                                       src.depth(), greenbits, src.channels());
+    int src_offset = (int)src.offset, src_step = (int)src.step;
+    int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
+
+    vector<pair<size_t , const void *> > args;
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
+    args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
+    args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
+
+    size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
+    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2RGB5x5", gt, lt, args, -1, -1, build_options.c_str());
+}
+
 static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
 {
     Size sz = src.size();
@@ -141,12 +185,31 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         RGB_caller(src, dst, reverse);
         break;
     }
-    /*
     case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
     case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
+    {
+        CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
+        bidx = code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
+            code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2;
+        int greenbits = code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
+            code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5;
+        dst.create(sz, CV_8UC2);
+        RGB2RGB5x5_caller(src, dst, bidx, greenbits);
+        break;
+    }
     case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
     case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
-    */
+    {
+        dcn = code == CV_BGR5652BGRA || code == CV_BGR5552BGRA || code == CV_BGR5652RGBA || code == CV_BGR5552RGBA ? 4 : 3;
+        CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
+        bidx = code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
+            code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2;
+        int greenbits = code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
+            code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5;
+        dst.create(sz, CV_MAKETYPE(depth, dcn));
+        RGB5x52RGB_caller(src, dst, bidx, greenbits);
+        break;
+    }
     case CV_RGB2GRAY: case CV_BGR2GRAY:
     case CV_RGBA2GRAY: case CV_BGRA2GRAY:
     {
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 916d44bf9..b0bd7d084 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -437,3 +437,61 @@ __kernel void RGB(int cols, int rows, int src_step, int dst_step,
 #endif
     }
 }
+
+///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
+
+__kernel void RGB5x52RGB(int cols, int rows, int src_step, int dst_step, int bidx,
+                         __global const ushort * src, __global uchar * dst,
+                         int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + (x << 2));
+        ushort t = src[src_idx];
+
+#if greenbits == 6
+        dst[dst_idx + bidx] = (uchar)(t << 3);
+        dst[dst_idx + 1] = (uchar)((t >> 3) & ~3);
+        dst[dst_idx + (bidx^2)] = (uchar)((t >> 8) & ~7);
+#else
+        dst[dst_idx + bidx] = (uchar)(t << 3);
+        dst[dst_idx + 1] = (uchar)((t >> 2) & ~7);
+        dst[dst_idx + (bidx^2)] = (uchar)((t >> 7) & ~7);
+#endif
+
+#if dcn == 4
+#if greenbits == 6
+        dst[dst_idx + 3] = 255;
+#else
+        dst[dst_idx + 3] = t & 0x8000 ? 255 : 0;
+#endif
+#endif
+    }
+}
+
+__kernel void RGB2RGB5x5(int cols, int rows, int src_step, int dst_step, int bidx,
+                         __global const uchar * src, __global ushort * dst,
+                         int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        int src_idx = mad24(y, src_step, src_offset + (x << 2));
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+#if greenbits == 6
+            dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~3) << 3)|((src[src_idx + (bidx^2)]&~7) << 8));
+#elif scn == 3
+            dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)|((src[src_idx + (bidx^2)]&~7) << 7));
+#else
+            dst[dst_idx] = (ushort)((src[src_idx + bidx] >> 3)|((src[src_idx + 1]&~7) << 2)|
+                ((src[src_idx + (bidx^2)]&~7) << 7)|(src[src_idx + 3] ? 0x8000 : 0));
+#endif
+    }
+}
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index e98268425..105ee89d9 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -181,6 +181,30 @@ OCL_TEST_P(CvtColor, XYZ2BGR) { doTest(3, 3, CVTCODE(XYZ2BGR)); }
 OCL_TEST_P(CvtColor, XYZ2RGBA) { doTest(3, 4, CVTCODE(XYZ2RGB)); }
 OCL_TEST_P(CvtColor, XYZ2BGRA) { doTest(3, 4, CVTCODE(XYZ2BGR)); }
 
+// RGB5x5 <-> RGB
+
+typedef CvtColor CvtColor8u;
+
+OCL_TEST_P(CvtColor8u, BGR5652BGR) { doTest(2, 3, CVTCODE(BGR5652BGR)); }
+OCL_TEST_P(CvtColor8u, BGR5652RGB) { doTest(2, 3, CVTCODE(BGR5652RGB)); }
+OCL_TEST_P(CvtColor8u, BGR5652BGRA) { doTest(2, 4, CVTCODE(BGR5652BGRA)); }
+OCL_TEST_P(CvtColor8u, BGR5652RGBA) { doTest(2, 4, CVTCODE(BGR5652RGBA)); }
+
+OCL_TEST_P(CvtColor8u, BGR5552BGR) { doTest(2, 3, CVTCODE(BGR5552BGR)); }
+OCL_TEST_P(CvtColor8u, BGR5552RGB) { doTest(2, 3, CVTCODE(BGR5552RGB)); }
+OCL_TEST_P(CvtColor8u, BGR5552BGRA) { doTest(2, 4, CVTCODE(BGR5552BGRA)); }
+OCL_TEST_P(CvtColor8u, BGR5552RGBA) { doTest(2, 4, CVTCODE(BGR5552RGBA)); }
+
+OCL_TEST_P(CvtColor8u, BGR2BGR565) { doTest(3, 2, CVTCODE(BGR2BGR565)); }
+OCL_TEST_P(CvtColor8u, RGB2BGR565) { doTest(3, 2, CVTCODE(RGB2BGR565)); }
+OCL_TEST_P(CvtColor8u, BGRA2BGR565) { doTest(4, 2, CVTCODE(BGRA2BGR565)); }
+OCL_TEST_P(CvtColor8u, RGBA2BGR565) { doTest(4, 2, CVTCODE(RGBA2BGR565)); }
+
+OCL_TEST_P(CvtColor8u, BGR2BGR555) { doTest(3, 2, CVTCODE(BGR2BGR555)); }
+OCL_TEST_P(CvtColor8u, RGB2BGR555) { doTest(3, 2, CVTCODE(RGB2BGR555)); }
+OCL_TEST_P(CvtColor8u, BGRA2BGR555) { doTest(4, 2, CVTCODE(BGRA2BGR555)); }
+OCL_TEST_P(CvtColor8u, RGBA2BGR555) { doTest(4, 2, CVTCODE(RGBA2BGR555)); }
+
 // YUV -> RGBA_NV12
 
 struct CvtColor_YUV420 :
@@ -210,6 +234,10 @@ OCL_TEST_P(CvtColor_YUV420, YUV2BGRA_NV12) { doTest(1, 4, CV_YUV2BGRA_NV12); }
 OCL_TEST_P(CvtColor_YUV420, YUV2RGB_NV12) { doTest(1, 3, CV_YUV2RGB_NV12); }
 OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { doTest(1, 3, CV_YUV2BGR_NV12); }
 
+
+INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u,
+                            testing::Combine(testing::Values(MatDepth(CV_8U)), Bool()));
+
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor,
                             testing::Combine(
                                 testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),

From 3727168b50cc1693366f9cd517f70f5b46dc593e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 8 Nov 2013 18:43:15 +0400
Subject: [PATCH 14/45] added a performance test for
 ocl::buildWarpPerspectiveMaps; moved warps to a separate file

---
 modules/ocl/perf/perf_imgproc.cpp | 194 ------------------
 modules/ocl/perf/perf_imgwarp.cpp | 320 ++++++++++++++++++++++++++++++
 modules/ocl/test/test_warp.cpp    |   4 +-
 3 files changed, 322 insertions(+), 196 deletions(-)
 create mode 100644 modules/ocl/perf/perf_imgwarp.cpp

diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp
index c57950ff1..b43458c6a 100644
--- a/modules/ocl/perf/perf_imgproc.cpp
+++ b/modules/ocl/perf/perf_imgproc.cpp
@@ -231,139 +231,6 @@ PERF_TEST_P(integralFixture, integral, OCL_TYPICAL_MAT_SIZES)
         OCL_PERF_ELSE
 }
 
-///////////// WarpAffine ////////////////////////
-
-typedef Size_MatType WarpAffineFixture;
-
-PERF_TEST_P(WarpAffineFixture, WarpAffine,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    static const double coeffs[2][3] =
-    {
-        { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
-        { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
-    };
-    Mat M(2, 3, CV_64F, (void *)coeffs);
-    const int interpolation = INTER_NEAREST;
-
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// WarpPerspective ////////////////////////
-
-typedef Size_MatType WarpPerspectiveFixture;
-
-PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
-{
-    static const double coeffs[3][3] =
-    {
-        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
-        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
-        {0.0, 0.0, 1.0}
-    };
-    Mat M(3, 3, CV_64F, (void *)coeffs);
-    const int interpolation = INTER_LINEAR;
-
-    const Size_MatType_t params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst)
-            .time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
-
-        SANITY_CHECK(dst);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
-///////////// resize ////////////////////////
-
-CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
-
-typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
-typedef TestBaseWithParam<resizeParams> resizeFixture;
-
-PERF_TEST_P(resizeFixture, resize,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
-                               resizeInterType::all(),
-                               ::testing::Values(0.5, 2.0)))
-{
-    const resizeParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), interType = get<2>(params);
-    double scale = get<3>(params);
-
-    Mat src(srcSize, type), dst;
-    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
-    dst.create(dstSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-    if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
-        declare.time(11);
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
-
-        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
 ///////////// threshold////////////////////////
 
 CV_ENUM(ThreshType, THRESH_BINARY, THRESH_TOZERO_INV)
@@ -727,67 +594,6 @@ PERF_TEST_P(meanShiftProcFixture, meanShiftProc,
         OCL_PERF_ELSE
 }
 
-///////////// remap////////////////////////
-
-CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
-
-typedef tuple<Size, MatType, RemapInterType> remapParams;
-typedef TestBaseWithParam<remapParams> remapFixture;
-
-PERF_TEST_P(remapFixture, remap,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
-                               RemapInterType::all()))
-{
-    const remapParams params = GetParam();
-    const Size srcSize = get<0>(params);
-    const int type = get<1>(params), interpolation = get<2>(params);
-
-    Mat src(srcSize, type), dst(srcSize, type);
-    declare.in(src, WARMUP_RNG).out(dst);
-
-    if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
-        declare.time(9);
-
-    Mat xmap, ymap;
-    xmap.create(srcSize, CV_32FC1);
-    ymap.create(srcSize, CV_32FC1);
-
-    for (int i = 0; i < srcSize.height; ++i)
-    {
-        float * const xmap_row = xmap.ptr<float>(i);
-        float * const ymap_row = ymap.ptr<float>(i);
-
-        for (int j = 0; j < srcSize.width; ++j)
-        {
-            xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
-            ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
-        }
-    }
-
-    const int borderMode = BORDER_CONSTANT;
-
-    if (RUN_OCL_IMPL)
-    {
-        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
-        ocl::oclMat oclXMap(xmap), oclYMap(ymap);
-
-        OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
-
-        oclDst.download(dst);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else if (RUN_PLAIN_IMPL)
-    {
-        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
-
-        SANITY_CHECK(dst, 1 + DBL_EPSILON);
-    }
-    else
-        OCL_PERF_ELSE
-}
-
 ///////////// CLAHE ////////////////////////
 
 typedef TestBaseWithParam<Size> CLAHEFixture;
diff --git a/modules/ocl/perf/perf_imgwarp.cpp b/modules/ocl/perf/perf_imgwarp.cpp
new file mode 100644
index 000000000..0aff45e9a
--- /dev/null
+++ b/modules/ocl/perf/perf_imgwarp.cpp
@@ -0,0 +1,320 @@
+/*M///////////////////////////////////////////////////////////////////////////////////////
+//
+//  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
+//
+//  By downloading, copying, installing or using the software you agree to this license.
+//  If you do not agree to this license, do not download, install,
+//  copy or use the software.
+//
+//
+//                           License Agreement
+//                For Open Source Computer Vision Library
+//
+// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
+// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
+// Third party copyrights are property of their respective owners.
+//
+// @Authors
+//    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
+//
+// Redistribution and use in source and binary forms, with or without modification,
+// are permitted provided that the following conditions are met:
+//
+//   * Redistribution's of source code must retain the above copyright notice,
+//     this list of conditions and the following disclaimer.
+//
+//   * Redistribution's in binary form must reproduce the above copyright notice,
+//     this list of conditions and the following disclaimer in the documentation
+//     and/or other materials provided with the distribution.
+//
+//   * The name of the copyright holders may not be used to endorse or promote products
+//     derived from this software without specific prior written permission.
+//
+// This software is provided by the copyright holders and contributors as is and
+// any express or implied warranties, including, but not limited to, the implied
+// warranties of merchantability and fitness for a particular purpose are disclaimed.
+// In no event shall the Intel Corporation or contributors be liable for any direct,
+// indirect, incidental, special, exemplary, or consequential damages
+// (including, but not limited to, procurement of substitute goods or services;
+// loss of use, data, or profits; or business interruption) however caused
+// and on any theory of liability, whether in contract, strict liability,
+// or tort (including negligence or otherwise) arising in any way out of
+// the use of this software, even if advised of the possibility of such damage.
+//
+//M*/
+#include "perf_precomp.hpp"
+
+using namespace perf;
+using std::tr1::tuple;
+using std::tr1::get;
+
+///////////// WarpAffine ////////////////////////
+
+typedef Size_MatType WarpAffineFixture;
+
+PERF_TEST_P(WarpAffineFixture, WarpAffine,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
+{
+    static const double coeffs[2][3] =
+    {
+        { cos(CV_PI / 6), -sin(CV_PI / 6), 100.0 },
+        { sin(CV_PI / 6), cos(CV_PI / 6), -100.0 }
+    };
+    Mat M(2, 3, CV_64F, (void *)coeffs);
+    const int interpolation = INTER_NEAREST;
+
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::warpAffine(src, dst, M, srcSize, interpolation);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// WarpPerspective ////////////////////////
+
+typedef Size_MatType WarpPerspectiveFixture;
+
+PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4)))
+{
+    static const double coeffs[3][3] =
+    {
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
+        {0.0, 0.0, 1.0}
+    };
+    Mat M(3, 3, CV_64F, (void *)coeffs);
+    const int interpolation = INTER_LINEAR;
+
+    const Size_MatType_t params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst)
+            .time(srcSize == OCL_SIZE_4000 ? 18 : srcSize == OCL_SIZE_2000 ? 5 : 2);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::warpPerspective(src, dst, M, srcSize, interpolation);
+
+        SANITY_CHECK(dst);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// resize ////////////////////////
+
+CV_ENUM(resizeInterType, INTER_NEAREST, INTER_LINEAR)
+
+typedef tuple<Size, MatType, resizeInterType, double> resizeParams;
+typedef TestBaseWithParam<resizeParams> resizeFixture;
+
+PERF_TEST_P(resizeFixture, resize,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
+                               resizeInterType::all(),
+                               ::testing::Values(0.5, 2.0)))
+{
+    const resizeParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), interType = get<2>(params);
+    double scale = get<3>(params);
+
+    Mat src(srcSize, type), dst;
+    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
+    dst.create(dstSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+    if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
+        declare.time(11);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(dstSize, type);
+
+        OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::resize(src, dst, Size(), scale, scale, interType);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+///////////// remap////////////////////////
+
+CV_ENUM(RemapInterType, INTER_NEAREST, INTER_LINEAR)
+
+typedef tuple<Size, MatType, RemapInterType> remapParams;
+typedef TestBaseWithParam<remapParams> remapFixture;
+
+PERF_TEST_P(remapFixture, remap,
+            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
+                               OCL_PERF_ENUM(CV_8UC1, CV_8UC4),
+                               RemapInterType::all()))
+{
+    const remapParams params = GetParam();
+    const Size srcSize = get<0>(params);
+    const int type = get<1>(params), interpolation = get<2>(params);
+
+    Mat src(srcSize, type), dst(srcSize, type);
+    declare.in(src, WARMUP_RNG).out(dst);
+
+    if (srcSize == OCL_SIZE_4000 && interpolation == INTER_LINEAR)
+        declare.time(9);
+
+    Mat xmap, ymap;
+    xmap.create(srcSize, CV_32FC1);
+    ymap.create(srcSize, CV_32FC1);
+
+    for (int i = 0; i < srcSize.height; ++i)
+    {
+        float * const xmap_row = xmap.ptr<float>(i);
+        float * const ymap_row = ymap.ptr<float>(i);
+
+        for (int j = 0; j < srcSize.width; ++j)
+        {
+            xmap_row[j] = (j - srcSize.width * 0.5f) * 0.75f + srcSize.width * 0.5f;
+            ymap_row[j] = (i - srcSize.height * 0.5f) * 0.75f + srcSize.height * 0.5f;
+        }
+    }
+
+    const int borderMode = BORDER_CONSTANT;
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclSrc(src), oclDst(srcSize, type);
+        ocl::oclMat oclXMap(xmap), oclYMap(ymap);
+
+        OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode);
+
+        oclDst.download(dst);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() cv::remap(src, dst, xmap, ymap, interpolation, borderMode);
+
+        SANITY_CHECK(dst, 1 + DBL_EPSILON);
+    }
+    else
+        OCL_PERF_ELSE
+}
+
+
+///////////// buildWarpPerspectiveMaps ////////////////////////
+
+static void buildWarpPerspectiveMaps(const Mat &M, bool inverse, Size dsize, Mat &xmap, Mat &ymap)
+{
+    CV_Assert(M.rows == 3 && M.cols == 3);
+    CV_Assert(dsize.area() > 0);
+
+    xmap.create(dsize, CV_32FC1);
+    ymap.create(dsize, CV_32FC1);
+
+    float coeffs[3 * 3];
+    Mat coeffsMat(3, 3, CV_32F, (void *)coeffs);
+
+    if (inverse)
+        M.convertTo(coeffsMat, coeffsMat.type());
+    else
+    {
+        cv::Mat iM;
+        invert(M, iM);
+        iM.convertTo(coeffsMat, coeffsMat.type());
+    }
+
+    for (int y = 0; y < dsize.height; ++y)
+    {
+        float * const xmap_ptr = xmap.ptr<float>(y);
+        float * const ymap_ptr = ymap.ptr<float>(y);
+
+        for (int x = 0; x < dsize.width; ++x)
+        {
+            float coeff = 1.0f / (x * coeffs[6] + y * coeffs[7] + coeffs[8]);
+            xmap_ptr[x] = (x * coeffs[0] + y * coeffs[1] + coeffs[2]) * coeff;
+            ymap_ptr[x] = (x * coeffs[3] + y * coeffs[4] + coeffs[5]) * coeff;
+        }
+    }
+}
+
+typedef TestBaseWithParam<Size> buildWarpPerspectiveMapsFixture;
+
+PERF_TEST_P(buildWarpPerspectiveMapsFixture, Inverse, OCL_TYPICAL_MAT_SIZES)
+{
+    static const double coeffs[3][3] =
+    {
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
+        {0.0, 0.0, 1.0}
+    };
+    Mat M(3, 3, CV_64F, (void *)coeffs);
+    const Size dsize = GetParam();
+
+    Mat xmap(dsize, CV_32FC1), ymap(dsize, CV_32FC1);
+    declare.in(M).out(xmap, ymap);
+
+    if (RUN_OCL_IMPL)
+    {
+        ocl::oclMat oclXMap(dsize, CV_32FC1), oclYMap(dsize, CV_32FC1);
+
+        OCL_TEST_CYCLE() cv::ocl::buildWarpPerspectiveMaps(M, true, dsize, oclXMap, oclYMap);
+
+        oclXMap.download(xmap);
+        oclYMap.download(ymap);
+
+        SANITY_CHECK(xmap);
+        SANITY_CHECK(ymap);
+    }
+    else if (RUN_PLAIN_IMPL)
+    {
+        TEST_CYCLE() buildWarpPerspectiveMaps(M, true, dsize, xmap, ymap);
+
+        SANITY_CHECK(xmap);
+        SANITY_CHECK(ymap);
+    }
+    else
+        OCL_PERF_ELSE
+}
diff --git a/modules/ocl/test/test_warp.cpp b/modules/ocl/test/test_warp.cpp
index 3da73dc23..42415d099 100644
--- a/modules/ocl/test/test_warp.cpp
+++ b/modules/ocl/test/test_warp.cpp
@@ -259,8 +259,8 @@ OCL_TEST_P(BuildWarpPerspectiveMaps, Mat)
         buildWarpPerspectiveMaps(M, mapInverse, dsize, xmap_roi, ymap_roi);
         ocl::buildWarpPerspectiveMaps(M, mapInverse, dsize, gxmap_roi, gymap_roi);
 
-        Near(1e-6);
-        Near1(1e-6);
+        Near(5e-3);
+        Near1(5e-3);
     }
 }
 

From eff53e95f9a4a504417026493b83c75a5e1bdfd3 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Mon, 11 Nov 2013 18:02:56 +0400
Subject: [PATCH 15/45] Fixed the "image sequence" capture not failing when a
 pattern isn't found.

at can't be a null pointer, so the condition was always false, and
a nonsensical pattern like "image.png%00d" was being inferred.
---
 modules/highgui/src/cap_images.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/highgui/src/cap_images.cpp b/modules/highgui/src/cap_images.cpp
index 4cf51d8e5..e2feb0d27 100644
--- a/modules/highgui/src/cap_images.cpp
+++ b/modules/highgui/src/cap_images.cpp
@@ -203,7 +203,7 @@ static char* icvExtractPattern(const char *filename, unsigned *offset)
         for(at = name; *at && !isdigit(*at); at++)
             ;
 
-        if(!at)
+        if(!*at)
             return 0;
 
         sscanf(at, "%u", offset);

From daedc6f45428cc96100aa4c9de48105f136a2437 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 11 Nov 2013 17:53:21 +0400
Subject: [PATCH 16/45] removed 2000x2000, 4000x4000 from test sizes of
 ocl::adaptiveBilateralFilter

---
 modules/ocl/perf/perf_filters.cpp | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp
index b6dcd2a08..ef0ea7b45 100644
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -366,8 +366,7 @@ PERF_TEST_P(BilateralFixture, Bilateral,
 typedef Size_MatType adaptiveBilateralFixture;
 
 PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
-            ::testing::Combine(OCL_TYPICAL_MAT_SIZES,
-                               OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
+            ::testing::Combine(::testing::Values(OCL_SIZE_1000), OCL_PERF_ENUM(CV_8UC1, CV_8UC3)))
 {
     const Size_MatType_t params = GetParam();
     const Size srcSize = get<0>(params);
@@ -378,11 +377,6 @@ PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
-    if (srcSize == OCL_SIZE_4000)
-        declare.time(type == CV_8UC3 ? 46 : 28);
-    else if (srcSize == OCL_SIZE_2000)
-        declare.time(type == CV_8UC3 ? 11 : 7);
-
     if (RUN_OCL_IMPL)
     {
         ocl::oclMat oclSrc(src), oclDst(srcSize, type);

From 405227d531f638a646e93f30b04888cdb96991c7 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Mon, 11 Nov 2013 18:30:04 +0400
Subject: [PATCH 17/45] Replaced the image used in the
 Highgui_Video.ffmpeg_image test.

Our prebuilt FFmpeg Windows binaries don't have PNG support enabled
(because that requires zlib), so that makes a PNG image a bad choice
for this test.

When FFmpeg doesn't support PNG, VideoCapture falls back to the
"image sequence" implementation, which doesn't work for single images.
---
 modules/highgui/test/test_ffmpeg.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/highgui/test/test_ffmpeg.cpp b/modules/highgui/test/test_ffmpeg.cpp
index 468fe77f7..30410eaab 100644
--- a/modules/highgui/test/test_ffmpeg.cpp
+++ b/modules/highgui/test/test_ffmpeg.cpp
@@ -154,7 +154,7 @@ public:
     {
         try
         {
-            string filename = ts->get_data_path() + "../cv/features2d/tsukuba.png";
+            string filename = ts->get_data_path() + "readwrite/ordinary.bmp";
             VideoCapture cap(filename);
             Mat img0 = imread(filename, 1);
             Mat img, img_next;

From eda6360fa329c12edbdec55ef0bbfffd63f6c590 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 11 Nov 2013 16:30:23 +0400
Subject: [PATCH 18/45] added RGB5x5 <-> Gray

---
 modules/ocl/src/color.cpp           | 30 +++++++++++++-----
 modules/ocl/src/opencl/cvt_color.cl | 49 +++++++++++++++++++++++++++++
 modules/ocl/test/test_color.cpp     |  8 +++++
 3 files changed, 80 insertions(+), 7 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index f9bccc89b..b01dcef1f 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -122,12 +122,12 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
     openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
 }
 
-static void RGB5x52RGB_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits)
+static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
 {
     std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
                                        src.depth(), greenbits, dst.channels());
     int src_offset = src.offset >> 1, src_step = src.step >> 1;
-    int dst_offset = (int)dst.offset, dst_step = (int)dst.step;
+    int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
 
     vector<pair<size_t , const void *> > args;
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
@@ -141,10 +141,10 @@ static void RGB5x52RGB_caller(const oclMat &src, oclMat &dst, int bidx, int gree
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
 
     size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
-    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB5x52RGB", gt, lt, args, -1, -1, build_options.c_str());
+    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
 
-static void RGB2RGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits)
+static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
 {
     std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
                                        src.depth(), greenbits, src.channels());
@@ -163,7 +163,7 @@ static void RGB2RGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
 
     size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
-    openCLExecuteKernel(src.clCxt, &cvt_color, "RGB2RGB5x5", gt, lt, args, -1, -1, build_options.c_str());
+    openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
 }
 
 static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
@@ -194,7 +194,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         int greenbits = code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
             code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5;
         dst.create(sz, CV_8UC2);
-        RGB2RGB5x5_caller(src, dst, bidx, greenbits);
+        toRGB5x5_caller(src, dst, bidx, greenbits, "RGB2RGB5x5");
         break;
     }
     case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
@@ -207,7 +207,23 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         int greenbits = code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
             code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5;
         dst.create(sz, CV_MAKETYPE(depth, dcn));
-        RGB5x52RGB_caller(src, dst, bidx, greenbits);
+        fromRGB5x5_caller(src, dst, bidx, greenbits, "RGB5x52RGB");
+        break;
+    }
+    case CV_BGR5652GRAY: case CV_BGR5552GRAY:
+    {
+        CV_Assert(scn == 2 && depth == CV_8U);
+        dst.create(sz, CV_8UC1);
+        int greenbits = code == CV_BGR5652GRAY ? 6 : 5;
+        fromRGB5x5_caller(src, dst, -1, greenbits, "BGR5x52Gray");
+        break;
+    }
+    case CV_GRAY2BGR565: case CV_GRAY2BGR555:
+    {
+        CV_Assert(scn == 1 && depth == CV_8U);
+        dst.create(sz, CV_8UC2);
+        int greenbits = code == CV_GRAY2BGR565 ? 6 : 5;
+        toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5");
         break;
     }
     case CV_RGB2GRAY: case CV_BGR2GRAY:
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index b0bd7d084..210d1b766 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -495,3 +495,52 @@ __kernel void RGB2RGB5x5(int cols, int rows, int src_step, int dst_step, int bid
 #endif
     }
 }
+
+///////////////////////////////////// RGB5x5 <-> RGB //////////////////////////////////////
+
+__kernel void BGR5x52Gray(int cols, int rows, int src_step, int dst_step, int bidx,
+                          __global const ushort * src, __global uchar * dst,
+                          int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+        int t = src[src_idx];
+
+#if greenbits == 6
+        dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
+                                         ((t >> 3) & 0xfc)*G2Y +
+                                         ((t >> 8) & 0xf8)*R2Y, yuv_shift);
+#else
+        dst[dst_idx] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
+                                         ((t >> 2) & 0xf8)*G2Y +
+                                         ((t >> 7) & 0xf8)*R2Y, yuv_shift);
+#endif
+    }
+}
+
+__kernel void Gray2BGR5x5(int cols, int rows, int src_step, int dst_step, int bidx,
+                          __global const uchar * src, __global ushort * dst,
+                          int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+        int t = src[src_idx];
+
+#if greenbits == 6
+        dst[dst_idx] = (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
+#else
+        t >>= 3;
+        dst[dst_idx] = (ushort)(t|(t << 5)|(t << 10));
+#endif
+    }
+}
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 105ee89d9..732a0f8e4 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -205,6 +205,14 @@ OCL_TEST_P(CvtColor8u, RGB2BGR555) { doTest(3, 2, CVTCODE(RGB2BGR555)); }
 OCL_TEST_P(CvtColor8u, BGRA2BGR555) { doTest(4, 2, CVTCODE(BGRA2BGR555)); }
 OCL_TEST_P(CvtColor8u, RGBA2BGR555) { doTest(4, 2, CVTCODE(RGBA2BGR555)); }
 
+// RGB5x5 <-> Gray
+
+OCL_TEST_P(CvtColor8u, BGR5652GRAY) { doTest(2, 1, CVTCODE(BGR5652GRAY)); }
+OCL_TEST_P(CvtColor8u, BGR5552GRAY) { doTest(2, 1, CVTCODE(BGR5552GRAY)); }
+
+OCL_TEST_P(CvtColor8u, GRAY2BGR565) { doTest(1, 2, CVTCODE(GRAY2BGR565)); }
+OCL_TEST_P(CvtColor8u, GRAY2BGR555) { doTest(1, 2, CVTCODE(GRAY2BGR555)); }
+
 // YUV -> RGBA_NV12
 
 struct CvtColor_YUV420 :

From 8af626d295a18c84f4b35680222885435e8a4506 Mon Sep 17 00:00:00 2001
From: perping <erping@multicorewareinc.com>
Date: Tue, 12 Nov 2013 13:56:47 +0800
Subject: [PATCH 19/45] fix some bug about haar and match_template.

---
 modules/ocl/perf/perf_match_template.cpp |  4 +--
 modules/ocl/src/haar.cpp                 | 34 ++++++++++++++++++++----
 modules/ocl/src/match_template.cpp       |  6 ++---
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp
index d6f7fe0f3..3ee038a84 100644
--- a/modules/ocl/perf/perf_match_template.cpp
+++ b/modules/ocl/perf/perf_match_template.cpp
@@ -108,13 +108,13 @@ PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate, OCL_TYPICAL_MAT_SIZES)
 
         oclDst.download(dst);
 
-        SANITY_CHECK(dst, 2e-2);
+        SANITY_CHECK(dst, 3e-2);
     }
     else if (RUN_PLAIN_IMPL)
     {
         TEST_CYCLE() cv::matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED);
 
-        SANITY_CHECK(dst, 2e-2);
+        SANITY_CHECK(dst, 3e-2);
     }
     else
         OCL_PERF_ELSE
diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index b79ec0fdc..ff0c007d9 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -754,6 +754,15 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         cv::Rect roi, roi2;
         cv::Mat imgroi, imgroisq;
         cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
+        int sdepth = 0;
+        if(gsqsum.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE))
+            sdepth = CV_64FC1;
+        else
+            sdepth = CV_32FC1;
+        sdepth = CV_MAT_DEPTH(sdepth);
+        int type = CV_MAKE_TYPE(sdepth, 1);
+
+        cv::ocl::oclMat gsqsum_t(gsqsum.size(), type);
         int grp_per_CU = 12;
 
         size_t blocksize = 8;
@@ -773,7 +782,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
             roi2 = Rect(0, 0, sz.width - 1, sz.height - 1);
             resizeroi = gimg1(roi2);
             gimgroi = gsum(roi);
-            gimgroisq = gsqsum(roi);
+            gimgroisq = gsqsum_t(roi);
             int width = gimgroi.cols - 1 - cascade->orig_window_size.width;
             int height = gimgroi.rows - 1 - cascade->orig_window_size.height;
             scaleinfo[i].width_height = (width << 16) | height;
@@ -787,8 +796,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
             scaleinfo[i].factor = factor;
             cv::ocl::resize(gimg, resizeroi, Size(sz.width - 1, sz.height - 1), 0, 0, INTER_LINEAR);
             cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
+
             indexy += sz.height;
         }
+        gsqsum_t.convertTo(gsqsum, CV_32FC1);
 
         gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
         stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
@@ -996,7 +1007,9 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         int n_factors = 0;
         oclMat gsum;
         oclMat gsqsum;
-        cv::ocl::integral(gimg, gsum, gsqsum);
+        cv::ocl::oclMat gsqsum_t;
+        cv::ocl::integral(gimg, gsum, gsqsum_t);
+        gsqsum_t.convertTo(gsqsum, CV_32FC1);
         CvSize sz;
         vector<CvSize> sizev;
         vector<float> scalev;
@@ -1259,10 +1272,19 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     if( (flags & CV_HAAR_SCALE_IMAGE) )
     {
         int indexy = 0;
+        int sdepth = 0;
         CvSize sz;
 
         cv::Rect roi, roi2;
         cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
+        if(gsqsum.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE))
+            sdepth = CV_64FC1;
+        else
+            sdepth = CV_32FC1;
+        sdepth = CV_MAT_DEPTH(sdepth);
+        int type = CV_MAKE_TYPE(sdepth, 1);
+
+        cv::ocl::oclMat gsqsum_t(gsqsum.size(), type);
 
         for( int i = 0; i < m_loopcount; i++ )
         {
@@ -1271,13 +1293,13 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
             roi2 = Rect(0, 0, sz.width - 1, sz.height - 1);
             resizeroi = gimg1(roi2);
             gimgroi = gsum(roi);
-            gimgroisq = gsqsum(roi);
+            gimgroisq = gsqsum_t(roi);
 
             cv::ocl::resize(gimg, resizeroi, Size(sz.width - 1, sz.height - 1), 0, 0, INTER_LINEAR);
             cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
             indexy += sz.height;
         }
-
+        gsqsum_t.convertTo(gsqsum, CV_32FC1);
         gcascade   = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
         stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
 
@@ -1338,7 +1360,9 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     }
     else
     {
-        cv::ocl::integral(gimg, gsum, gsqsum);
+        cv::ocl::oclMat gsqsum_t;
+        cv::ocl::integral(gimg, gsum, gsqsum_t);
+        gsqsum_t.convertTo(gsqsum, CV_32FC1);
 
         gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
 
diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp
index afd68ffe4..d18bacc6f 100644
--- a/modules/ocl/src/match_template.cpp
+++ b/modules/ocl/src/match_template.cpp
@@ -245,12 +245,12 @@ namespace cv
         void matchTemplate_CCORR_NORMED(
             const oclMat &image, const oclMat &templ, oclMat &result, MatchTemplateBuf &buf)
         {
+            cv::ocl::oclMat temp;
             matchTemplate_CCORR(image, templ, result, buf);
             buf.image_sums.resize(1);
             buf.image_sqsums.resize(1);
-
-            integral(image.reshape(1), buf.image_sums[0], buf.image_sqsums[0]);
-
+            integral(image.reshape(1), buf.image_sums[0], temp);
+            temp.convertTo(buf.image_sqsums[0], CV_32FC1);
             unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 
             Context *clCxt = image.clCxt;

From dee584ee152e25df476878be801b15b4236d3204 Mon Sep 17 00:00:00 2001
From: perping <erping@multicorewareinc.com>
Date: Tue, 12 Nov 2013 17:54:03 +0800
Subject: [PATCH 20/45] fix haar

---
 modules/ocl/include/opencv2/ocl/ocl.hpp |  2 +-
 modules/ocl/src/haar.cpp                | 55 +++++++++++++++----------
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp
index bdf83257c..b8c9b8535 100644
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -939,7 +939,7 @@ namespace cv
             Size m_maxSize;
             vector<CvSize> sizev;
             vector<float> scalev;
-            oclMat gimg1, gsum, gsqsum;
+            oclMat gimg1, gsum, gsqsum, gsqsum_t;
             void * buffers;
         };
 
diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index ff0c007d9..db3dd46e8 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -747,6 +747,15 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         oclMat gsum(totalheight + 4, gimg.cols + 1, CV_32SC1);
         oclMat gsqsum(totalheight + 4, gimg.cols + 1, CV_32FC1);
 
+        int sdepth = 0;
+        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+            sdepth = CV_64FC1;
+        else
+            sdepth = CV_32FC1;
+        sdepth = CV_MAT_DEPTH(sdepth);
+        int type = CV_MAKE_TYPE(sdepth, 1);
+        oclMat gsqsum_t(totalheight + 4, gimg.cols + 1, type);
+
         cl_mem stagebuffer;
         cl_mem nodebuffer;
         cl_mem candidatebuffer;
@@ -754,15 +763,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         cv::Rect roi, roi2;
         cv::Mat imgroi, imgroisq;
         cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
-        int sdepth = 0;
-        if(gsqsum.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE))
-            sdepth = CV_64FC1;
-        else
-            sdepth = CV_32FC1;
-        sdepth = CV_MAT_DEPTH(sdepth);
-        int type = CV_MAKE_TYPE(sdepth, 1);
 
-        cv::ocl::oclMat gsqsum_t(gsqsum.size(), type);
         int grp_per_CU = 12;
 
         size_t blocksize = 8;
@@ -799,7 +800,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
 
             indexy += sz.height;
         }
-        gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+            gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        else
+            gsqsum = gsqsum_t;
 
         gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
         stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
@@ -1007,7 +1011,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         int n_factors = 0;
         oclMat gsum;
         oclMat gsqsum;
-        cv::ocl::oclMat gsqsum_t;
+        oclMat gsqsum_t;
         cv::ocl::integral(gimg, gsum, gsqsum_t);
         gsqsum_t.convertTo(gsqsum, CV_32FC1);
         CvSize sz;
@@ -1277,14 +1281,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
 
         cv::Rect roi, roi2;
         cv::ocl::oclMat resizeroi, gimgroi, gimgroisq;
-        if(gsqsum.clCxt->supportsFeature(ocl::FEATURE_CL_DOUBLE))
-            sdepth = CV_64FC1;
-        else
-            sdepth = CV_32FC1;
-        sdepth = CV_MAT_DEPTH(sdepth);
-        int type = CV_MAKE_TYPE(sdepth, 1);
-
-        cv::ocl::oclMat gsqsum_t(gsqsum.size(), type);
 
         for( int i = 0; i < m_loopcount; i++ )
         {
@@ -1299,7 +1295,11 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
             cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
             indexy += sz.height;
         }
-        gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+            gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        else
+            gsqsum = gsqsum_t;
+
         gcascade   = (GpuHidHaarClassifierCascade *)(cascade->hid_cascade);
         stage      = (GpuHidHaarStageClassifier *)(gcascade + 1);
 
@@ -1360,9 +1360,11 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     }
     else
     {
-        cv::ocl::oclMat gsqsum_t;
         cv::ocl::integral(gimg, gsum, gsqsum_t);
-        gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+            gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        else
+            gsqsum = gsqsum_t;
 
         gcascade   = (GpuHidHaarClassifierCascade *)cascade->hid_cascade;
 
@@ -1588,6 +1590,7 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
             gimg1.release();
             gsum.release();
             gsqsum.release();
+            gsqsum_t.release();
         }
         else if (!(m_flags & CV_HAAR_SCALE_IMAGE) && (flags & CV_HAAR_SCALE_IMAGE))
         {
@@ -1662,6 +1665,16 @@ void cv::ocl::OclCascadeClassifierBuf::CreateFactorRelatedBufs(
         gsum.create(totalheight + 4, cols + 1, CV_32SC1);
         gsqsum.create(totalheight + 4, cols + 1, CV_32FC1);
 
+        int sdepth = 0;
+        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+            sdepth = CV_64FC1;
+        else
+            sdepth = CV_32FC1;
+        sdepth = CV_MAT_DEPTH(sdepth);
+        int type = CV_MAKE_TYPE(sdepth, 1);
+
+        gsqsum_t.create(totalheight + 4, cols + 1, type);
+
         scaleinfo = (detect_piramid_info *)malloc(sizeof(detect_piramid_info) * loopcount);
         for( int i = 0; i < loopcount; i++ )
         {

From d6e0ef2710071f93953837ec44a21d81218ee88a Mon Sep 17 00:00:00 2001
From: perping <erping@multicorewareinc.com>
Date: Tue, 12 Nov 2013 18:13:44 +0800
Subject: [PATCH 21/45] fix linux warning.

---
 modules/ocl/src/haar.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index db3dd46e8..bd5a8d1e8 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -1276,7 +1276,6 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     if( (flags & CV_HAAR_SCALE_IMAGE) )
     {
         int indexy = 0;
-        int sdepth = 0;
         CvSize sz;
 
         cv::Rect roi, roi2;

From 96121a66c583000bc9a91fe97fe00cb4292fe251 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Mon, 11 Nov 2013 20:08:40 +0400
Subject: [PATCH 22/45] kernel warnings on AMD

---
 modules/ocl/src/opencl/arithm_LUT.cl          |   6 +-
 .../src/opencl/arithm_absdiff_nonsaturate.cl  |   8 +-
 modules/ocl/src/opencl/arithm_add.cl          |   8 +-
 modules/ocl/src/opencl/arithm_addWeighted.cl  |   8 +-
 modules/ocl/src/opencl/arithm_add_mask.cl     |   8 +-
 modules/ocl/src/opencl/arithm_add_scalar.cl   |   8 +-
 .../ocl/src/opencl/arithm_add_scalar_mask.cl  |   8 +-
 .../arithm_bitwise_binary_scalar_mask.cl      |   8 -
 modules/ocl/src/opencl/arithm_bitwise_not.cl  |   8 +-
 modules/ocl/src/opencl/arithm_cartToPolar.cl  |  41 +++--
 modules/ocl/src/opencl/arithm_compare.cl      |   8 +-
 modules/ocl/src/opencl/arithm_exp.cl          |   8 +-
 modules/ocl/src/opencl/arithm_flip.cl         |   8 +-
 modules/ocl/src/opencl/arithm_log.cl          |   6 +-
 modules/ocl/src/opencl/arithm_magnitude.cl    |   6 +-
 modules/ocl/src/opencl/arithm_minMax.cl       |   2 +-
 modules/ocl/src/opencl/arithm_minMaxLoc.cl    |   7 +-
 .../ocl/src/opencl/arithm_minMaxLoc_mask.cl   |   7 +-
 modules/ocl/src/opencl/arithm_nonzero.cl      |   2 +-
 modules/ocl/src/opencl/arithm_phase.cl        |  22 +--
 modules/ocl/src/opencl/arithm_polarToCart.cl  |  14 +-
 modules/ocl/src/opencl/arithm_pow.cl          |  28 ++--
 modules/ocl/src/opencl/arithm_setidentity.cl  |   8 +-
 modules/ocl/src/opencl/arithm_sum.cl          |   8 +-
 modules/ocl/src/opencl/arithm_transpose.cl    |   2 +-
 modules/ocl/src/opencl/bgfg_mog.cl            |  21 ++-
 modules/ocl/src/opencl/blend_linear.cl        |   2 +-
 modules/ocl/src/opencl/brute_force_match.cl   |  15 +-
 modules/ocl/src/opencl/convertC3C4.cl         |  20 ++-
 modules/ocl/src/opencl/filtering_boxFilter.cl |   4 +
 modules/ocl/src/opencl/filtering_filter2D.cl  |   4 +
 .../src/opencl/haarobjectdetect_scaled2.cl    |   6 +-
 modules/ocl/src/opencl/imgproc_convolve.cl    |   6 +-
 .../ocl/src/opencl/imgproc_copymakeboder.cl   |   2 +-
 modules/ocl/src/opencl/imgproc_integral.cl    |   9 +-
 .../ocl/src/opencl/imgproc_integral_sum.cl    |   8 +-
 modules/ocl/src/opencl/imgproc_remap.cl       |   8 +-
 modules/ocl/src/opencl/imgproc_resize.cl      |   6 +-
 modules/ocl/src/opencl/imgproc_threshold.cl   |   2 +-
 modules/ocl/src/opencl/imgproc_warpAffine.cl  |   8 +-
 .../ocl/src/opencl/imgproc_warpPerspective.cl |   8 +-
 .../src/opencl/kernel_stablesort_by_key.cl    |  29 ----
 modules/ocl/src/opencl/knearest.cl            |   7 +-
 modules/ocl/src/opencl/match_template.cl      |  10 +-
 modules/ocl/src/opencl/merge_mat.cl           |   8 +-
 modules/ocl/src/opencl/moments.cl             |   8 +-
 modules/ocl/src/opencl/operator_convertTo.cl  |   4 +
 modules/ocl/src/opencl/operator_copyToM.cl    |   8 +-
 modules/ocl/src/opencl/operator_setTo.cl      |   8 +-
 modules/ocl/src/opencl/operator_setToM.cl     |   8 +-
 modules/ocl/src/opencl/pyrlk.cl               |   2 -
 modules/ocl/src/opencl/split_mat.cl           |   7 +-
 modules/ocl/src/opencl/stereobm.cl            |   1 -
 modules/ocl/src/opencl/stereobp.cl            |   8 +-
 modules/ocl/src/opencl/stereocsbp.cl          |  58 ++-----
 modules/ocl/src/opencl/svm.cl                 |  12 +-
 modules/ocl/src/opencl/tvl1flow.cl            | 157 ++++++++----------
 57 files changed, 338 insertions(+), 373 deletions(-)

diff --git a/modules/ocl/src/opencl/arithm_LUT.cl b/modules/ocl/src/opencl/arithm_LUT.cl
index 658e1f4bc..30407bb88 100644
--- a/modules/ocl/src/opencl/arithm_LUT.cl
+++ b/modules/ocl/src/opencl/arithm_LUT.cl
@@ -34,9 +34,13 @@
 //
 //
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif
 
 __kernel void LUT_C1( __global const srcT * src, __global const dstT *lut,
       __global dstT *dst,
diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
index fcf38749d..c09560a5f 100644
--- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
@@ -44,11 +44,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl
index a73b65da6..04262b872 100644
--- a/modules/ocl/src/opencl/arithm_add.cl
+++ b/modules/ocl/src/opencl/arithm_add.cl
@@ -44,11 +44,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_addWeighted.cl b/modules/ocl/src/opencl/arithm_addWeighted.cl
index 8272806e2..872ee8535 100644
--- a/modules/ocl/src/opencl/arithm_addWeighted.cl
+++ b/modules/ocl/src/opencl/arithm_addWeighted.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_add_mask.cl b/modules/ocl/src/opencl/arithm_add_mask.cl
index ea96d8a8a..b115d9b76 100644
--- a/modules/ocl/src/opencl/arithm_add_mask.cl
+++ b/modules/ocl/src/opencl/arithm_add_mask.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl
index b82eff595..05ea48da4 100644
--- a/modules/ocl/src/opencl/arithm_add_scalar.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
index 0762b19b1..a8b965758 100644
--- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
index 03f46ccc0..756f20165 100644
--- a/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
+++ b/modules/ocl/src/opencl/arithm_bitwise_binary_scalar_mask.cl
@@ -43,14 +43,6 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
-#pragma OPENCL EXTENSION cl_amd_fp64:enable
-#endif
-#endif
-
 //////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////bitwise_binary////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/src/opencl/arithm_bitwise_not.cl b/modules/ocl/src/opencl/arithm_bitwise_not.cl
index 5bc1839d6..b6f76d606 100644
--- a/modules/ocl/src/opencl/arithm_bitwise_not.cl
+++ b/modules/ocl/src/opencl/arithm_bitwise_not.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_cartToPolar.cl b/modules/ocl/src/opencl/arithm_cartToPolar.cl
index e37818c40..f634f2d42 100644
--- a/modules/ocl/src/opencl/arithm_cartToPolar.cl
+++ b/modules/ocl/src/opencl/arithm_cartToPolar.cl
@@ -43,24 +43,21 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-    #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #define CV_PI   3.1415926535897932384626433832795
-    #ifndef DBL_EPSILON
-        #define DBL_EPSILON 0x1.0p-52
-    #endif
-#else
-    #define CV_PI   3.1415926535897932384626433832795f
-    #ifndef DBL_EPSILON
-        #define DBL_EPSILON 0x1.0p-52f
-    #endif
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
+#else
+#define CV_PI M_PI_F
 #endif
-
 
 __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int src1_offset,
                                      __global float *src2, int src2_step, int src2_offset,
-                                     __global float *dst1, int dst1_step, int dst1_offset, //magnitude
-                                     __global float *dst2, int dst2_step, int dst2_offset, //cartToPolar
+                                     __global float *dst1, int dst1_step, int dst1_offset, // magnitude
+                                     __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
                                      int rows, int cols, int angInDegree)
 {
     int x = get_global_id(0);
@@ -81,16 +78,15 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
         float y2 = y * y;
 
         float magnitude = sqrt(x2 + y2);
-        float cartToPolar;
 
         float tmp = y >= 0 ? 0 : CV_PI*2;
         tmp = x < 0 ? CV_PI : tmp;
 
         float tmp1 = y >= 0 ? CV_PI*0.5f : CV_PI*1.5f;
-        cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON)  + tmp :
-                                 tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
+        float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
+                                 tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
 
-        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
+        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
 
         *((__global float *)((__global char *)dst1 + dst1_index)) = magnitude;
         *((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar;
@@ -98,6 +94,7 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
 }
 
 #if defined (DOUBLE_SUPPORT)
+
 __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int src1_offset,
                                      __global double *src2, int src2_step, int src2_offset,
                                      __global double *dst1, int dst1_step, int dst1_offset,
@@ -122,19 +119,19 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
         double y2 = y * y;
 
         double magnitude = sqrt(x2 + y2);
-        double cartToPolar;
 
         float tmp = y >= 0 ? 0 : CV_PI*2;
         tmp = x < 0 ? CV_PI : tmp;
 
         float tmp1 = y >= 0 ? CV_PI*0.5 : CV_PI*1.5;
-        cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + (float)DBL_EPSILON)  + tmp :
-                                 tmp1 - x*y/(y2 + 0.28f*x2 + (float)DBL_EPSILON);
+        double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON)  + tmp :
+                                 tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
 
-        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (float)(180/CV_PI);
+        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
 
         *((__global double *)((__global char *)dst1 + dst1_index)) = magnitude;
         *((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar;
     }
 }
+
 #endif
diff --git a/modules/ocl/src/opencl/arithm_compare.cl b/modules/ocl/src/opencl/arithm_compare.cl
index 005d3c73f..73e6299bb 100644
--- a/modules/ocl/src/opencl/arithm_compare.cl
+++ b/modules/ocl/src/opencl/arithm_compare.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_exp.cl b/modules/ocl/src/opencl/arithm_exp.cl
index 835bc95c3..f0a189353 100644
--- a/modules/ocl/src/opencl/arithm_exp.cl
+++ b/modules/ocl/src/opencl/arithm_exp.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_flip.cl b/modules/ocl/src/opencl/arithm_flip.cl
index 416240bd8..b9bacd339 100644
--- a/modules/ocl/src/opencl/arithm_flip.cl
+++ b/modules/ocl/src/opencl/arithm_flip.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_log.cl b/modules/ocl/src/opencl/arithm_log.cl
index fe1b3046a..ba5f32d6d 100644
--- a/modules/ocl/src/opencl/arithm_log.cl
+++ b/modules/ocl/src/opencl/arithm_log.cl
@@ -43,9 +43,13 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////////LOG/////////////////////////////////////////////////////
diff --git a/modules/ocl/src/opencl/arithm_magnitude.cl b/modules/ocl/src/opencl/arithm_magnitude.cl
index 7c8cc187e..6fd2ac383 100644
--- a/modules/ocl/src/opencl/arithm_magnitude.cl
+++ b/modules/ocl/src/opencl/arithm_magnitude.cl
@@ -43,9 +43,13 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif
 
 __kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset,
                                    __global float *src2, int src2_step, int src2_offset,
diff --git a/modules/ocl/src/opencl/arithm_minMax.cl b/modules/ocl/src/opencl/arithm_minMax.cl
index 33a39d83f..01db7d064 100644
--- a/modules/ocl/src/opencl/arithm_minMax.cl
+++ b/modules/ocl/src/opencl/arithm_minMax.cl
@@ -45,7 +45,7 @@
 
 /**************************************PUBLICFUNC*************************************/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
 #ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc.cl b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
index 076fb0600..b80ce2b47 100644
--- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
@@ -44,8 +44,13 @@
 //M*/
 
 /**************************************PUBLICFUNC*************************************/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
 #define RES_TYPE double4
 #define CONVERT_RES_TYPE convert_double4
 #else
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
index 4d73be954..fbde684cd 100644
--- a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
@@ -44,8 +44,13 @@
 //M*/
 
 /**************************************PUBLICFUNC*************************************/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
 #define RES_TYPE double4
 #define CONVERT_RES_TYPE convert_double4
 #else
diff --git a/modules/ocl/src/opencl/arithm_nonzero.cl b/modules/ocl/src/opencl/arithm_nonzero.cl
index fc9825796..3180c26e8 100644
--- a/modules/ocl/src/opencl/arithm_nonzero.cl
+++ b/modules/ocl/src/opencl/arithm_nonzero.cl
@@ -42,7 +42,7 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
 #ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/arithm_phase.cl b/modules/ocl/src/opencl/arithm_phase.cl
index 978fd3b97..40346b2cd 100644
--- a/modules/ocl/src/opencl/arithm_phase.cl
+++ b/modules/ocl/src/opencl/arithm_phase.cl
@@ -44,17 +44,17 @@
 //
 //
 
-#if defined (DOUBLE_SUPPORT)
-    #ifdef cl_amd_fp64
-        #pragma OPENCL EXTENSION cl_amd_fp64:enable
-    #elif defined (cl_khr_fp64)
-        #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #endif
-    #define CV_PI M_PI
-    #define CV_2PI (2 * CV_PI)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
+#define CV_2PI (2 * CV_PI)
 #else
-    #define CV_PI M_PI_F
-    #define CV_2PI (2 * CV_PI)
+#define CV_PI M_PI_F
+#define CV_2PI (2 * CV_PI)
 #endif
 
 /**************************************phase inradians**************************************/
@@ -159,7 +159,7 @@ __kernel void arithm_phase_indegrees_D6 (__global double *src1, int src1_step1,
 
         double data1 = src1[src1_index];
         double data2 = src2[src2_index];
-        double tmp = atan2(src2[src2_index], src1[src1_index]);
+        double tmp = atan2(data2, data1);
 
         tmp = 180 * tmp / CV_PI;
         if (tmp < 0)
diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl
index 9e2e860e7..f3ec3117d 100644
--- a/modules/ocl/src/opencl/arithm_polarToCart.cl
+++ b/modules/ocl/src/opencl/arithm_polarToCart.cl
@@ -44,14 +44,14 @@
 //M*/
 
 #ifdef DOUBLE_SUPPORT
-    #ifdef cl_amd_fp64
-        #pragma OPENCL EXTENSION cl_amd_fp64:enable
-    #elif defined (cl_khr_fp64)
-        #pragma OPENCL EXTENSION cl_khr_fp64:enable
-    #endif
-    #define CV_PI M_PI
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#define CV_PI M_PI
 #else
-    #define CV_PI M_PI_F
+#define CV_PI M_PI_F
 #endif
 
 /////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl
index 1704f6b42..36a22b628 100644
--- a/modules/ocl/src/opencl/arithm_pow.cl
+++ b/modules/ocl/src/opencl/arithm_pow.cl
@@ -43,21 +43,22 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
-typedef double F;
-typedef double4 F4;
-#define convert_F4 convert_double4;
-#else
-typedef float F;
-typedef float4 F4;
-#define convert_F4 convert_float4;
 #endif
+#define F double
+#else
+#define F float
+#endif
+
 /************************************** pow **************************************/
+
 __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset,
                              __global float *dst,  int dst_step,  int dst_offset,
-                             int rows, int cols, int dst_step1,
-                             F p)
+                             int rows, int cols, int dst_step1, F p)
 {
 
     int x = get_global_id(0);
@@ -73,14 +74,13 @@ __kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offse
 
         *((__global float *)((__global char *)dst + dst_index)) = tmp;
     }
-
 }
 
 #if defined (DOUBLE_SUPPORT)
+
 __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset,
                              __global double *dst,  int dst_step,  int dst_offset,
-                             int rows, int cols, int dst_step1,
-                             F p)
+                             int rows, int cols, int dst_step1, F p)
 {
 
     int x = get_global_id(0);
@@ -95,6 +95,6 @@ __kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offs
         double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
         *((__global double *)((__global char *)dst + dst_index)) = tmp;
     }
-
 }
+
 #endif
diff --git a/modules/ocl/src/opencl/arithm_setidentity.cl b/modules/ocl/src/opencl/arithm_setidentity.cl
index fb684c367..0ead5b003 100644
--- a/modules/ocl/src/opencl/arithm_setidentity.cl
+++ b/modules/ocl/src/opencl/arithm_setidentity.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_sum.cl b/modules/ocl/src/opencl/arithm_sum.cl
index 7ada5be4c..514cf2a7f 100644
--- a/modules/ocl/src/opencl/arithm_sum.cl
+++ b/modules/ocl/src/opencl/arithm_sum.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/arithm_transpose.cl b/modules/ocl/src/opencl/arithm_transpose.cl
index bd06a5208..8cde6544e 100644
--- a/modules/ocl/src/opencl/arithm_transpose.cl
+++ b/modules/ocl/src/opencl/arithm_transpose.cl
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
 #ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/bgfg_mog.cl b/modules/ocl/src/opencl/bgfg_mog.cl
index a13a30e90..06e18c213 100644
--- a/modules/ocl/src/opencl/bgfg_mog.cl
+++ b/modules/ocl/src/opencl/bgfg_mog.cl
@@ -67,11 +67,14 @@ static float clamp1(float var, float learningRate, float diff, float minVar)
 {
     return fmax(var + learningRate * (diff * diff - var), minVar);
 }
+
 #else
+
 #define T_FRAME uchar4
 #define T_MEAN_VAR float4
 #define CONVERT_TYPE convert_uchar4_sat
 #define F_ZERO (0.0f, 0.0f, 0.0f, 0.0f)
+
 inline float4 cvt(const uchar4 val)
 {
     float4 result;
@@ -93,6 +96,14 @@ inline float sum(const float4 val)
     return (val.x + val.y + val.z);
 }
 
+static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
+{
+    float4 val = ptr[(k * rows + y) * ptr_step + x];
+    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
+    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
+}
+
+
 static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
 {
     float4 result;
@@ -102,6 +113,7 @@ static float4 clamp1(const float4 var, float learningRate, const float4 diff, fl
     result.w = 0.0f;
     return result;
 }
+
 #endif
 
 typedef struct
@@ -114,7 +126,7 @@ typedef struct
     float c_varMax;
     float c_tau;
     uchar c_shadowVal;
-}con_srtuct_t;
+} con_srtuct_t;
 
 static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
 {
@@ -123,13 +135,6 @@ static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_ste
     ptr[((k + 1) * rows + y) * ptr_step + x] = val;
 }
 
-static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
-{
-    float4 val = ptr[(k * rows + y) * ptr_step + x];
-    ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
-    ptr[((k + 1) * rows + y) * ptr_step + x] = val;
-}
-
 __kernel void mog_withoutLearning_kernel(__global T_FRAME* frame, __global uchar* fgmask,
     __global float* weight, __global T_MEAN_VAR* mean, __global T_MEAN_VAR* var,
     int frame_row, int frame_col, int frame_step, int fgmask_step,
diff --git a/modules/ocl/src/opencl/blend_linear.cl b/modules/ocl/src/opencl/blend_linear.cl
index 06a51f25c..bc7aa4685 100644
--- a/modules/ocl/src/opencl/blend_linear.cl
+++ b/modules/ocl/src/opencl/blend_linear.cl
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
 #ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/brute_force_match.cl b/modules/ocl/src/opencl/brute_force_match.cl
index ce0d86e8a..a005284ee 100644
--- a/modules/ocl/src/opencl/brute_force_match.cl
+++ b/modules/ocl/src/opencl/brute_force_match.cl
@@ -63,14 +63,6 @@
 #define DIST_TYPE 0
 #endif
 
-//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-static int bit1Count(int v)
-{
-    v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
-    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
-    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
-}
-
 // dirty fix for non-template support
 #if   (DIST_TYPE == 0) // L1Dist
 #   ifdef T_FLOAT
@@ -89,6 +81,13 @@ typedef float value_type;
 typedef float result_type;
 #define DIST_RES(x) sqrt(x)
 #elif (DIST_TYPE == 2) // Hamming
+//http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+static int bit1Count(int v)
+{
+    v = v - ((v >> 1) & 0x55555555);                    // reuse input as temporary
+    v = (v & 0x33333333) + ((v >> 2) & 0x33333333);     // temp
+    return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; // count
+}
 #define DIST(x, y) bit1Count( (x) ^ (y) )
 typedef int value_type;
 typedef int result_type;
diff --git a/modules/ocl/src/opencl/convertC3C4.cl b/modules/ocl/src/opencl/convertC3C4.cl
index b3e699dc4..4c519fdf7 100644
--- a/modules/ocl/src/opencl/convertC3C4.cl
+++ b/modules/ocl/src/opencl/convertC3C4.cl
@@ -33,12 +33,17 @@
 //
 //
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif
 
-__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
-                    int dstStep_in_piexl,int pixel_end)
+__kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
+                         int cols, int rows,
+                         int dstStep_in_piexl, int pixel_end)
 {
     int id = get_global_id(0);
     int3 pixelid = (int3)(mul24(id,3),mad24(id,3,1),mad24(id,3,2));
@@ -88,13 +93,12 @@ __kernel void convertC3C4(__global const GENTYPE4 * restrict src, __global GENTY
         dst[addr.y] = outpix1;
     }
     else if(outx.x<cols && outy.x<rows)
-    {
         dst[addr.x] = outpix0;
-    }
 }
 
-__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst, int cols, int rows,
-                    int srcStep_in_pixel,int pixel_end)
+__kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTYPE4 *dst,
+                          int cols, int rows,
+                          int srcStep_in_pixel, int pixel_end)
 {
     int id = get_global_id(0)<<2;
     int y = id / cols;
@@ -145,7 +149,5 @@ __kernel void convertC4C3(__global const GENTYPE4 * restrict src, __global GENTY
         dst[outaddr.y] = outpixel1;
     }
     else if(outaddr.x <= pixel_end)
-    {
         dst[outaddr.x] = pixel0;
-    }
 }
diff --git a/modules/ocl/src/opencl/filtering_boxFilter.cl b/modules/ocl/src/opencl/filtering_boxFilter.cl
index 7f7fd018d..96091ce6e 100644
--- a/modules/ocl/src/opencl/filtering_boxFilter.cl
+++ b/modules/ocl/src/opencl/filtering_boxFilter.cl
@@ -146,7 +146,11 @@
 #endif
 
 #if USE_DOUBLE
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
 #define FPTYPE double
 #define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
 #else
diff --git a/modules/ocl/src/opencl/filtering_filter2D.cl b/modules/ocl/src/opencl/filtering_filter2D.cl
index f96676689..fb7dca509 100644
--- a/modules/ocl/src/opencl/filtering_filter2D.cl
+++ b/modules/ocl/src/opencl/filtering_filter2D.cl
@@ -143,7 +143,11 @@
 #endif
 
 #if USE_DOUBLE
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
 #define FPTYPE double
 #define CONVERT_TO_FPTYPE CAT(convert_double, VEC_SIZE)
 #else
diff --git a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
index 72b94038c..a2feb82f4 100644
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -45,8 +45,6 @@
 //
 //M*/
 
-// Enter your kernel in this window
-//#pragma OPENCL EXTENSION cl_amd_printf:enable
 #define CV_HAAR_FEATURE_MAX           3
 typedef int   sumtype;
 typedef float sqsumtype;
@@ -288,8 +286,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
     int counter = get_global_id(0);
     int tr_x[3], tr_y[3], tr_h[3], tr_w[3], i = 0;
     GpuHidHaarTreeNode t1 = *(orinode + counter);
-#pragma unroll
 
+    #pragma unroll
     for (i = 0; i < 3; i++)
     {
         tr_x[i] = (int)(t1.p[i][0] * scale + 0.5f);
@@ -300,8 +298,8 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
 
     t1.weight[0] = -(t1.weight[1] * tr_h[1] * tr_w[1] + t1.weight[2] * tr_h[2] * tr_w[2]) / (tr_h[0] * tr_w[0]);
     counter += nodenum;
-#pragma unroll
 
+    #pragma unroll
     for (i = 0; i < 3; i++)
     {
         newnode[counter].p[i][0] = tr_x[i];
diff --git a/modules/ocl/src/opencl/imgproc_convolve.cl b/modules/ocl/src/opencl/imgproc_convolve.cl
index fb9596e5d..b8f974219 100644
--- a/modules/ocl/src/opencl/imgproc_convolve.cl
+++ b/modules/ocl/src/opencl/imgproc_convolve.cl
@@ -43,11 +43,13 @@
 //
 //M*/
 
-#if defined (__ATI__)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
-#elif defined (__NVIDIA__)
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif
 
 /************************************** convolve **************************************/
 
diff --git a/modules/ocl/src/opencl/imgproc_copymakeboder.cl b/modules/ocl/src/opencl/imgproc_copymakeboder.cl
index d97f66068..ac149a46b 100644
--- a/modules/ocl/src/opencl/imgproc_copymakeboder.cl
+++ b/modules/ocl/src/opencl/imgproc_copymakeboder.cl
@@ -34,7 +34,7 @@
 //
 //
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
 #ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/imgproc_integral.cl b/modules/ocl/src/opencl/imgproc_integral.cl
index 05e76f964..a8102e54a 100644
--- a/modules/ocl/src/opencl/imgproc_integral.cl
+++ b/modules/ocl/src/opencl/imgproc_integral.cl
@@ -43,13 +43,14 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
+
 #define LSIZE 256
 #define LSIZE_1 255
 #define LSIZE_2 254
diff --git a/modules/ocl/src/opencl/imgproc_integral_sum.cl b/modules/ocl/src/opencl/imgproc_integral_sum.cl
index a6f73c748..662406140 100644
--- a/modules/ocl/src/opencl/imgproc_integral_sum.cl
+++ b/modules/ocl/src/opencl/imgproc_integral_sum.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/imgproc_remap.cl b/modules/ocl/src/opencl/imgproc_remap.cl
index 340e741cc..e1e3ca8a0 100644
--- a/modules/ocl/src/opencl/imgproc_remap.cl
+++ b/modules/ocl/src/opencl/imgproc_remap.cl
@@ -43,11 +43,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/imgproc_resize.cl b/modules/ocl/src/opencl/imgproc_resize.cl
index 2bb75b90c..0d4cbedf6 100644
--- a/modules/ocl/src/opencl/imgproc_resize.cl
+++ b/modules/ocl/src/opencl/imgproc_resize.cl
@@ -48,8 +48,12 @@
 // Currently, CV_8UC1  CV_8UC4  CV_32FC1 and CV_32FC4are supported.
 // We shall support other types later if necessary.
 
-#if defined DOUBLE_SUPPORT
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
 #define F double
 #else
 #define F float
diff --git a/modules/ocl/src/opencl/imgproc_threshold.cl b/modules/ocl/src/opencl/imgproc_threshold.cl
index 6f97c0451..63e410297 100644
--- a/modules/ocl/src/opencl/imgproc_threshold.cl
+++ b/modules/ocl/src/opencl/imgproc_threshold.cl
@@ -43,7 +43,7 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
 #ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
 #elif defined (cl_khr_fp64)
diff --git a/modules/ocl/src/opencl/imgproc_warpAffine.cl b/modules/ocl/src/opencl/imgproc_warpAffine.cl
index a5050bbf0..27f99e005 100644
--- a/modules/ocl/src/opencl/imgproc_warpAffine.cl
+++ b/modules/ocl/src/opencl/imgproc_warpAffine.cl
@@ -47,11 +47,11 @@
 //warpAffine kernel
 //support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 typedef double F;
 typedef double4 F4;
diff --git a/modules/ocl/src/opencl/imgproc_warpPerspective.cl b/modules/ocl/src/opencl/imgproc_warpPerspective.cl
index eee1c8175..97f86640b 100644
--- a/modules/ocl/src/opencl/imgproc_warpPerspective.cl
+++ b/modules/ocl/src/opencl/imgproc_warpPerspective.cl
@@ -47,11 +47,11 @@
 //wrapPerspective kernel
 //support data types: CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, and three interpolation methods: NN, Linear, Cubic.
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 typedef double F;
 typedef double4 F4;
diff --git a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
index 2d38fbf2f..f8cc69300 100644
--- a/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_stablesort_by_key.cl
@@ -61,35 +61,6 @@
 #define my_comp(x,y) ((x) < (y))
 #endif
 
-///////////// parallel merge sort ///////////////
-// ported from https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/stablesort_by_key_kernels.cl
-static uint lowerBoundLinear( global K_T* data, uint left, uint right, K_T searchVal)
-{
-    //  The values firstIndex and lastIndex get modified within the loop, narrowing down the potential sequence
-    uint firstIndex = left;
-    uint lastIndex = right;
-
-    //  This loops through [firstIndex, lastIndex)
-    //  Since firstIndex and lastIndex will be different for every thread depending on the nested branch,
-    //  this while loop will be divergent within a wavefront
-    while( firstIndex < lastIndex )
-    {
-        K_T dataVal = data[ firstIndex ];
-
-        //  This branch will create divergent wavefronts
-        if( my_comp( dataVal, searchVal ) )
-        {
-            firstIndex = firstIndex+1;
-        }
-        else
-        {
-            break;
-        }
-    }
-
-    return firstIndex;
-}
-
 //  This implements a binary search routine to look for an 'insertion point' in a sequence, denoted
 //  by a base pointer and left and right index for a particular candidate value.  The comparison operator is
 //  passed as a functor parameter my_comp
diff --git a/modules/ocl/src/opencl/knearest.cl b/modules/ocl/src/opencl/knearest.cl
index bc0ae89a8..85e24517d 100644
--- a/modules/ocl/src/opencl/knearest.cl
+++ b/modules/ocl/src/opencl/knearest.cl
@@ -42,8 +42,13 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
 #define TYPE double
 #else
 #define TYPE float
diff --git a/modules/ocl/src/opencl/match_template.cl b/modules/ocl/src/opencl/match_template.cl
index 8b63c3bd2..4d46d0084 100644
--- a/modules/ocl/src/opencl/match_template.cl
+++ b/modules/ocl/src/opencl/match_template.cl
@@ -43,14 +43,12 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
-
 #define TYPE_IMAGE_SQSUM double
 #else
 #define TYPE_IMAGE_SQSUM float
diff --git a/modules/ocl/src/opencl/merge_mat.cl b/modules/ocl/src/opencl/merge_mat.cl
index 8b445c682..aea05aeb8 100644
--- a/modules/ocl/src/opencl/merge_mat.cl
+++ b/modules/ocl/src/opencl/merge_mat.cl
@@ -43,15 +43,19 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
-
+#endif
 
 ///////////////////////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////optimized code using vector roi//////////////////////////
 ////////////vector fuction name format: merge_vector_C(channels number)D_(data type depth)//////
 ////////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void merge_vector_C2_D0(__global uchar *mat_dst,  int dst_step,  int dst_offset,
                                  __global uchar *mat_src0, int src0_step, int src0_offset,
                                  __global uchar *mat_src1, int src1_step, int src1_offset,
diff --git a/modules/ocl/src/opencl/moments.cl b/modules/ocl/src/opencl/moments.cl
index 31c4c85ec..09c79c4b5 100644
--- a/modules/ocl/src/opencl/moments.cl
+++ b/modules/ocl/src/opencl/moments.cl
@@ -44,11 +44,11 @@
 //
 //M*/
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 typedef double T;
 #else
diff --git a/modules/ocl/src/opencl/operator_convertTo.cl b/modules/ocl/src/opencl/operator_convertTo.cl
index 85b562d65..ca38bd550 100644
--- a/modules/ocl/src/opencl/operator_convertTo.cl
+++ b/modules/ocl/src/opencl/operator_convertTo.cl
@@ -35,8 +35,12 @@
 //
 
 #ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif
 
 __kernel void convert_to(
         __global const srcT* restrict srcMat,
diff --git a/modules/ocl/src/opencl/operator_copyToM.cl b/modules/ocl/src/opencl/operator_copyToM.cl
index dcf5af975..69e1798ad 100644
--- a/modules/ocl/src/opencl/operator_copyToM.cl
+++ b/modules/ocl/src/opencl/operator_copyToM.cl
@@ -34,11 +34,11 @@
 //
 //
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/operator_setTo.cl b/modules/ocl/src/opencl/operator_setTo.cl
index 8ac480347..20c5cf211 100644
--- a/modules/ocl/src/opencl/operator_setTo.cl
+++ b/modules/ocl/src/opencl/operator_setTo.cl
@@ -34,11 +34,11 @@
 //
 //
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/operator_setToM.cl b/modules/ocl/src/opencl/operator_setToM.cl
index 8a489da9d..afaa2e61f 100644
--- a/modules/ocl/src/opencl/operator_setToM.cl
+++ b/modules/ocl/src/opencl/operator_setToM.cl
@@ -34,11 +34,11 @@
 //
 //
 
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #endif
 
diff --git a/modules/ocl/src/opencl/pyrlk.cl b/modules/ocl/src/opencl/pyrlk.cl
index a7fc27838..303d26892 100644
--- a/modules/ocl/src/opencl/pyrlk.cl
+++ b/modules/ocl/src/opencl/pyrlk.cl
@@ -45,8 +45,6 @@
 //
 //M*/
 
-//#pragma OPENCL EXTENSION cl_amd_printf : enable
-
 #define	BUFFER	64
 #define	BUFFER2	BUFFER>>1
 #ifndef WAVE_SIZE
diff --git a/modules/ocl/src/opencl/split_mat.cl b/modules/ocl/src/opencl/split_mat.cl
index b9aa048b0..b52b3c206 100644
--- a/modules/ocl/src/opencl/split_mat.cl
+++ b/modules/ocl/src/opencl/split_mat.cl
@@ -38,9 +38,14 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //M*/
-#if defined (DOUBLE_SUPPORT)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
+#endif
 
 #if DATA_DEPTH == 0
 #define BASE_TYPE uchar
diff --git a/modules/ocl/src/opencl/stereobm.cl b/modules/ocl/src/opencl/stereobm.cl
index 207bf0047..0edccdb1c 100644
--- a/modules/ocl/src/opencl/stereobm.cl
+++ b/modules/ocl/src/opencl/stereobm.cl
@@ -260,7 +260,6 @@ static float CalcSums(__local float *cols, __local float *cols_cache, int winsz)
 {
     unsigned int cache = cols[0];
 
-#pragma unroll
     for(int i = 1; i <= winsz; i++)
         cache += cols[i];
 
diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl
index ec02f827a..4b5864f4c 100644
--- a/modules/ocl/src/opencl/stereobp.cl
+++ b/modules/ocl/src/opencl/stereobp.cl
@@ -45,13 +45,11 @@
 //M*/
 
 #if defined (DOUBLE_SUPPORT)
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
-
 #endif
 
 #ifdef T_FLOAT
diff --git a/modules/ocl/src/opencl/stereocsbp.cl b/modules/ocl/src/opencl/stereocsbp.cl
index 13a201cc1..72c17073d 100644
--- a/modules/ocl/src/opencl/stereocsbp.cl
+++ b/modules/ocl/src/opencl/stereocsbp.cl
@@ -44,19 +44,10 @@
 //
 //M*/
 
-
-#ifndef FLT_MAX
-#define FLT_MAX  CL_FLT_MAX
-#endif
-
-#ifndef SHRT_MAX
-#define SHRT_MAX  CL_SHORT_MAX
-#endif
-
-
 ///////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////get_first_k_initial_global//////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_, __global short *selected_disp_pyr,
     __global short *ctemp, int h, int w, int nr_plane,
     int cmsg_step1, int cdisp_step1, int cndisp)
@@ -91,6 +82,7 @@ __kernel void get_first_k_initial_global_0(__global short *data_cost_selected_,
         }
     }
 }
+
 __kernel void get_first_k_initial_global_1(__global  float *data_cost_selected_, __global float *selected_disp_pyr,
     __global  float *ctemp, int h, int w, int nr_plane,
     int cmsg_step1, int cdisp_step1, int cndisp)
@@ -129,6 +121,7 @@ __kernel void get_first_k_initial_global_1(__global  float *data_cost_selected_,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_, __global short *selected_disp_pyr,
     __global  short *ctemp,int h, int w, int nr_plane,
     int cmsg_step1, int cdisp_step1, int cndisp)
@@ -248,6 +241,7 @@ __kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, _
 ///////////////////////////////////////////////////////////////
 /////////////////////// init data cost ////////////////////////
 ///////////////////////////////////////////////////////////////
+
 inline float compute_3(__global uchar* left, __global uchar* right,
     float cdata_weight,  float cmax_data_term)
 {
@@ -257,6 +251,7 @@ inline float compute_3(__global uchar* left, __global uchar* right,
 
     return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
 }
+
 inline float compute_1(__global uchar* left, __global uchar* right,
     float cdata_weight,  float cmax_data_term)
 {
@@ -316,6 +311,7 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
         }
     }
 }
+
 __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int channels,
     int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
@@ -360,9 +356,11 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
         }
     }
 }
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 //////////////////////////////////init_data_cost_reduce//////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
     __local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
     int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
@@ -630,6 +628,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
 ///////////////////////////////////////////////////////////////
 ////////////////////// compute data cost //////////////////////
 ///////////////////////////////////////////////////////////////
+
 __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
     __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int nr_plane, int channels,
@@ -680,6 +679,7 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
         }
     }
 }
+
 __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
     __global uchar *cleft, __global uchar *cright,
     int h, int w, int level, int nr_plane, int channels,
@@ -729,9 +729,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
         }
     }
 }
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
     __global uchar *cleft, __global uchar *cright,__local float *smem,
     int level, int rows, int cols, int h, int nr_plane,
@@ -1033,41 +1035,6 @@ static void get_first_k_element_increase_0(__global short* u_new, __global short
     }
 }
 
-static void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
-    __global float *r_new, __global const float *u_cur, __global const float *d_cur,
-    __global const float *l_cur, __global const float *r_cur,
-    __global float *data_cost_selected, __global float *disparity_selected_new,
-    __global float *data_cost_new, __global const float *data_cost_cur,
-    __global const float *disparity_selected_cur,
-    int nr_plane, int nr_plane2,
-    int cdisp_step1, int cdisp_step2)
-{
-    for(int i = 0; i < nr_plane; i++)
-    {
-        float minimum = FLT_MAX;
-        int id = 0;
-
-        for(int j = 0; j < nr_plane2; j++)
-        {
-            float cur = data_cost_new[j * cdisp_step1];
-            if(cur < minimum)
-            {
-                minimum = cur;
-                id = j;
-            }
-        }
-
-        data_cost_selected[i * cdisp_step1] = data_cost_cur[id * cdisp_step1];
-        disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
-
-        u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
-        d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
-        l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-        r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
-        data_cost_new[id * cdisp_step1] = FLT_MAX;
-
-    }
-}
 __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
     __global short *r_new_, __global  short *u_cur_, __global const short *d_cur_,
     __global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
@@ -1118,6 +1085,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
             cdisp_step1, cdisp_step2);
     }
 }
+
 __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
     __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
     __global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
diff --git a/modules/ocl/src/opencl/svm.cl b/modules/ocl/src/opencl/svm.cl
index 36ae38ed2..32b8194c0 100644
--- a/modules/ocl/src/opencl/svm.cl
+++ b/modules/ocl/src/opencl/svm.cl
@@ -33,11 +33,12 @@
 // the use of this software, even if advised of the possibility of such damage.
 //
 //
-#if defined (DOUBLE_SUPPORT)
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64:enable
-#elif defined (cl_amd_fp64)
+
+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
 #pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
 #define TYPE double
 #else
@@ -53,7 +54,6 @@
 #else
 #define POW(X,Y) X
 #endif
-#define FLT_MAX   3.402823466e+38F
 #define MAX_VAL   (FLT_MAX*1e-3)
 
 __kernel void svm_linear(__global float* src, int src_step, __global float* src2, int src2_step, __global TYPE* dst, int dst_step, int src_rows, int src2_cols,
@@ -206,4 +206,4 @@ __kernel void svm_rbf(__global float* src, int src_step, __global float* src2, i
             dst[row * dst_step + col] = temp1;
         }
     }
-}
\ No newline at end of file
+}
diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl
index 2787f00dc..6111a4a38 100644
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -44,7 +44,7 @@
 //M*/
 
 __kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
-__global float* dx, __global float* dy, int dx_step)
+                                     __global float* dx, __global float* dy, int dx_step)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
@@ -53,13 +53,6 @@ __global float* dx, __global float* dy, int dx_step)
     {
         int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
         int src_x2 = (x - 1) > 0 ? (x -1) : 0;
-
-        //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
-        //{
-        //    printf("y = %d\n", y);
-        //    printf("src_x1 = %d\n", src_x1);
-        //    printf("src_x2 = %d\n", src_x2);
-        //}
         dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
 
         int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
@@ -97,24 +90,24 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
     int u2_offset_x,
     int u2_offset_y)
 {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
 
     if(x < I0_col&&y < I0_row)
     {
-        //const float u1Val = u1(y, x);
-        const float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        //const float u2Val = u2(y, x);
-        const float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        //float u1Val = u1(y, x);
+        float u1Val = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        //float u2Val = u2(y, x);
+        float u2Val = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
 
-        const float wx = x + u1Val;
-        const float wy = y + u2Val;
+        float wx = x + u1Val;
+        float wy = y + u2Val;
 
-        const int xmin = ceil(wx - 2.0f);
-        const int xmax = floor(wx + 2.0f);
+        int xmin = ceil(wx - 2.0f);
+        int xmax = floor(wx + 2.0f);
 
-        const int ymin = ceil(wy - 2.0f);
-        const int ymax = floor(wy + 2.0f);
+        int ymin = ceil(wy - 2.0f);
+        int ymax = floor(wy + 2.0f);
 
         float sum  = 0.0f;
         float sumx = 0.0f;
@@ -126,7 +119,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
         {
             for (int cx = xmin; cx <= xmax; ++cx)
             {
-                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+                float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
 
                 //sum  += w * tex2D(tex_I1 , cx, cy);
                 int2 cood = (int2)(cx, cy);
@@ -140,30 +133,30 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
             }
         }
 
-        const float coeff = 1.0f / wsum;
+        float coeff = 1.0f / wsum;
 
-        const float I1wVal  = sum  * coeff;
-        const float I1wxVal = sumx * coeff;
-        const float I1wyVal = sumy * coeff;
+        float I1wVal  = sum  * coeff;
+        float I1wxVal = sumx * coeff;
+        float I1wyVal = sumy * coeff;
 
         I1w[y * I1w_step + x]  = I1wVal;
         I1wx[y * I1w_step + x] = I1wxVal;
         I1wy[y * I1w_step + x] = I1wyVal;
 
-        const float Ix2 = I1wxVal * I1wxVal;
-        const float Iy2 = I1wyVal * I1wyVal;
+        float Ix2 = I1wxVal * I1wxVal;
+        float Iy2 = I1wyVal * I1wyVal;
 
         // store the |Grad(I1)|^2
         grad[y * I1w_step + x] = Ix2 + Iy2;
 
         // compute the constant part of the rho function
-        const float I0Val = I0[y * I0_step + x];
+        float I0Val = I0[y * I0_step + x];
         rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
     }
 
 }
 
-static float readImage(__global const float *image,  const int x,  const int y,  const int rows,  const int cols, const int elemCntPerRow)
+static float readImage(__global float *image,  int x,  int y,  int rows,  int cols, int elemCntPerRow)
 {
     int i0 = clamp(x, 0, cols - 1);
     int j0 = clamp(y, 0, rows - 1);
@@ -185,24 +178,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
     int I1_step,
     int I1x_step)
 {
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
 
     if(x < I0_col&&y < I0_row)
     {
-        //const float u1Val = u1(y, x);
-        const float u1Val = u1[y * u1_step + x];
-        //const float u2Val = u2(y, x);
-        const float u2Val = u2[y * u2_step + x];
+        //float u1Val = u1(y, x);
+        float u1Val = u1[y * u1_step + x];
+        //float u2Val = u2(y, x);
+        float u2Val = u2[y * u2_step + x];
 
-        const float wx = x + u1Val;
-        const float wy = y + u2Val;
+        float wx = x + u1Val;
+        float wy = y + u2Val;
 
-        const int xmin = ceil(wx - 2.0f);
-        const int xmax = floor(wx + 2.0f);
+        int xmin = ceil(wx - 2.0f);
+        int xmax = floor(wx + 2.0f);
 
-        const int ymin = ceil(wy - 2.0f);
-        const int ymax = floor(wy + 2.0f);
+        int ymin = ceil(wy - 2.0f);
+        int ymax = floor(wy + 2.0f);
 
         float sum  = 0.0f;
         float sumx = 0.0f;
@@ -213,7 +206,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
         {
             for (int cx = xmin; cx <= xmax; ++cx)
             {
-                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
+                float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
 
                 int2 cood = (int2)(cx, cy);
                 sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
@@ -223,24 +216,24 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
             }
         }
 
-        const float coeff = 1.0f / wsum;
+        float coeff = 1.0f / wsum;
 
-        const float I1wVal  = sum  * coeff;
-        const float I1wxVal = sumx * coeff;
-        const float I1wyVal = sumy * coeff;
+        float I1wVal  = sum  * coeff;
+        float I1wxVal = sumx * coeff;
+        float I1wyVal = sumy * coeff;
 
         I1w[y * I1w_step + x]  = I1wVal;
         I1wx[y * I1w_step + x] = I1wxVal;
         I1wy[y * I1w_step + x] = I1wyVal;
 
-        const float Ix2 = I1wxVal * I1wxVal;
-        const float Iy2 = I1wyVal * I1wyVal;
+        float Ix2 = I1wxVal * I1wxVal;
+        float Iy2 = I1wyVal * I1wyVal;
 
         // store the |Grad(I1)|^2
         grad[y * I1w_step + x] = Ix2 + Iy2;
 
         // compute the constant part of the rho function
-        const float I0Val = I0[y * I0_step + x];
+        float I0Val = I0[y * I0_step + x];
         rho[y * I1w_step + x] = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
     }
 
@@ -253,38 +246,35 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,
     __global float* p12,
     __global float* p21,
     __global float* p22,
-    const float taut,
+    float taut,
     int u2_step,
     int u1_offset_x,
     int u1_offset_y,
     int u2_offset_x,
     int u2_offset_y)
 {
-
-    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
-    //const int y = blockIdx.y * blockDim.y + threadIdx.y;
-    const int x = get_global_id(0);
-    const int y = get_global_id(1);
+    int x = get_global_id(0);
+    int y = get_global_id(1);
 
     if(x < u1_col && y < u1_row)
     {
         int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
-        const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
 
         int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
-        const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
 
         int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
-        const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
 
         int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
-        const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
 
-        const float g1 = hypot(u1x, u1y);
-        const float g2 = hypot(u2x, u2y);
+        float g1 = hypot(u1x, u1y);
+        float g2 = hypot(u2x, u2y);
 
-        const float ng1 = 1.0f + taut * g1;
-        const float ng2 = 1.0f + taut * g2;
+        float ng1 = 1.0f + taut * g1;
+        float ng2 = 1.0f + taut * g2;
 
         p11[y * p11_step + x] = (p11[y * p11_step + x] + taut * u1x) / ng1;
         p12[y * p11_step + x] = (p12[y * p11_step + x] + taut * u1y) / ng1;
@@ -299,8 +289,8 @@ static float divergence(__global const float* v1, __global const float* v2, int
 
     if (x > 0 && y > 0)
     {
-        const float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
-        const float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
+        float v1x = v1[y * v1_step + x] - v1[y * v1_step + x - 1];
+        float v2y = v2[y * v2_step + x] - v2[(y - 1) * v2_step + x];
         return v1x + v2y;
     }
     else
@@ -328,30 +318,25 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
     __global const float* p22, /*int p22_step,*/
     __global float* u1, int u1_step,
     __global float* u2,
-    __global float* error, const float l_t, const float theta, int u2_step,
+    __global float* error, float l_t, float theta, int u2_step,
     int u1_offset_x,
     int u1_offset_y,
     int u2_offset_x,
     int u2_offset_y,
     char calc_error)
 {
-
-    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
-    //const int y = blockIdx.y * blockDim.y + threadIdx.y;
-
     int x = get_global_id(0);
     int y = get_global_id(1);
 
-
     if(x < I1wx_col && y < I1wx_row)
     {
-        const float I1wxVal = I1wx[y * I1wx_step + x];
-        const float I1wyVal = I1wy[y * I1wx_step + x];
-        const float gradVal = grad[y * I1wx_step + x];
-        const float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        const float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
+        float I1wxVal = I1wx[y * I1wx_step + x];
+        float I1wyVal = I1wy[y * I1wx_step + x];
+        float gradVal = grad[y * I1wx_step + x];
+        float u1OldVal = u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
+        float u2OldVal = u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];
 
-        const float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
+        float rho = rho_c[y * I1wx_step + x] + (I1wxVal * u1OldVal + I1wyVal * u2OldVal);
 
         // estimate the values of the variable (v1, v2) (thresholding operator TH)
 
@@ -370,31 +355,31 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx
         }
         else if (gradVal > 1.192092896e-07f)
         {
-            const float fi = -rho / gradVal;
+            float fi = -rho / gradVal;
             d1 = fi * I1wxVal;
             d2 = fi * I1wyVal;
         }
 
-        const float v1 = u1OldVal + d1;
-        const float v2 = u2OldVal + d2;
+        float v1 = u1OldVal + d1;
+        float v2 = u2OldVal + d2;
 
         // compute the divergence of the dual variable (p1, p2)
 
-        const float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
-        const float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
+        float div_p1 = divergence(p11, p12, y, x, I1wx_step, I1wx_step);
+        float div_p2 = divergence(p21, p22, y, x, I1wx_step, I1wx_step);
 
         // estimate the values of the optical flow (u1, u2)
 
-        const float u1NewVal = v1 + theta * div_p1;
-        const float u2NewVal = v2 + theta * div_p2;
+        float u1NewVal = v1 + theta * div_p1;
+        float u2NewVal = v2 + theta * div_p2;
 
         u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal;
         u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal;
 
         if(calc_error)
         {
-            const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
-            const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
+            float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
+            float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
             error[y * I1wx_step + x] = n1 + n2;
         }
     }

From b8e3d3f79163b7758d23ae51263d64371a69b638 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Tue, 12 Nov 2013 16:15:41 +0400
Subject: [PATCH 23/45] In the image sequence capture, only search for the
 ordinal in the file name.

Searching in directory names can yield confusing results; e.g. if
the input is "jpeg2000/image1.jp2", it will infer the pattern
"jpeg%04d/image1.jp2", which is likely not what the user intended.

If the user really desires for the variable part to be in the
directory name, it can always use an explicit pattern.
---
 modules/highgui/src/cap_images.cpp | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/modules/highgui/src/cap_images.cpp b/modules/highgui/src/cap_images.cpp
index e2feb0d27..e1a8b8b1c 100644
--- a/modules/highgui/src/cap_images.cpp
+++ b/modules/highgui/src/cap_images.cpp
@@ -200,8 +200,18 @@ static char* icvExtractPattern(const char *filename, unsigned *offset)
     }
     else // no pattern filename was given - extract the pattern
     {
-        for(at = name; *at && !isdigit(*at); at++)
-            ;
+        at = name;
+
+        // ignore directory names
+        char *slash = strrchr(at, '/');
+        if (slash) at = slash + 1;
+
+#ifdef _WIN32
+        slash = strrchr(at, '\\');
+        if (slash) at = slash + 1;
+#endif
+
+        while (*at && !isdigit(*at)) at++;
 
         if(!*at)
             return 0;

From 0bf73506159a74903633580a6b4ff50ca6d8cd63 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Tue, 12 Nov 2013 19:14:40 +0400
Subject: [PATCH 24/45] fixed compilation of ocl::minMaxLoc for Intel device

---
 modules/ocl/src/opencl/arithm_minMaxLoc.cl    | 11 ++--
 .../ocl/src/opencl/arithm_minMaxLoc_mask.cl   | 53 +++++++++++--------
 2 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc.cl b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
index 076fb0600..75b035800 100644
--- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
@@ -222,8 +222,9 @@ __kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int el
     {
         localmem_min[lid] = min(minval,localmem_min[lid]);
         localmem_max[lid] = max(maxval,localmem_max[lid]);
-        localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == minval, minloc, localmem_minloc[lid]);
-        localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == maxval, maxloc, localmem_maxloc[lid]);
+        VEC_TYPE minVal = localmem_min[lid], maxVal = localmem_max[lid];
+        localmem_minloc[lid] = CONDITION_FUNC(minVal == minval, minloc, localmem_minloc[lid]);
+        localmem_maxloc[lid] = CONDITION_FUNC(maxVal == maxval, maxloc, localmem_maxloc[lid]);
     }
     barrier(CLK_LOCAL_MEM_FENCE);
 
@@ -234,8 +235,10 @@ __kernel void arithm_op_minMaxLoc(int cols, int invalid_cols, int offset, int el
             int lid2 = lsize + lid;
             localmem_min[lid] = min(localmem_min[lid], localmem_min[lid2]);
             localmem_max[lid] = max(localmem_max[lid], localmem_max[lid2]);
-            localmem_minloc[lid] = CONDITION_FUNC(localmem_min[lid] == localmem_min[lid2], localmem_minloc[lid2], localmem_minloc[lid]);
-            localmem_maxloc[lid] = CONDITION_FUNC(localmem_max[lid] == localmem_max[lid2], localmem_maxloc[lid2], localmem_maxloc[lid]);
+            VEC_TYPE min1 = localmem_min[lid], min2 = localmem_min[lid2];
+            localmem_minloc[lid] = CONDITION_FUNC(min1 == min2, localmem_minloc[lid2], localmem_minloc[lid]);
+            VEC_TYPE max1 = localmem_max[lid], max2 = localmem_max[lid2];
+            localmem_maxloc[lid] = CONDITION_FUNC(max1 == max2, localmem_maxloc[lid2], localmem_maxloc[lid]);
        }
        barrier(CLK_LOCAL_MEM_FENCE);
     }
diff --git a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
index 4d73be954..53710f19d 100644
--- a/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc_mask.cl
@@ -152,24 +152,26 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
     int  id = get_global_id(0);
     int idx = id + (id / cols) * invalid_cols;
     int midx = id + (id / cols) * minvalid_cols;
+
     __local VEC_TYPE lm_max[128],lm_min[128];
-    VEC_TYPE minval,maxval,temp,m_temp;
-    __local VEC_TYPE_LOC lm_maxloc[128],lm_minloc[128];
-    VEC_TYPE_LOC minloc,maxloc,temploc,negative = -1,one = 1,zero = 0;
+    VEC_TYPE minval, maxval, temp, m_temp, zeroVal = (VEC_TYPE)(0);
+    __local VEC_TYPE_LOC lm_maxloc[128], lm_minloc[128];
+    VEC_TYPE_LOC minloc, maxloc, temploc, negative = -1, one = 1, zero = 0;
+
     if(id < elemnum)
     {
         temp = vload4(idx, &src[offset]);
         m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
         int idx_c = (idx << 2) + offset;
         temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
-        if(id % cols == cols - 1)
+        if (id % cols == cols - 1)
         {
             repeat_me(m_temp);
             repeat_e(temploc);
         }
-        minval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MAX_VAL;
-        maxval = m_temp != (VEC_TYPE)0 ? temp : (VEC_TYPE)MIN_VAL;
-        minloc = CONDITION_FUNC(m_temp != (VEC_TYPE)0, temploc , negative);
+        minval = m_temp != zeroVal ? temp : (VEC_TYPE)MAX_VAL;
+        maxval = m_temp != zeroVal ? temp : (VEC_TYPE)MIN_VAL;
+        minloc = CONDITION_FUNC(m_temp != zeroVal, temploc , negative);
         maxloc = minloc;
     }
     else
@@ -179,6 +181,7 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
         minloc = negative;
         maxloc = negative;
     }
+
     for(id=id + (groupnum << 8); id < elemnum;id = id + (groupnum << 8))
     {
         idx = id + (id / cols) * invalid_cols;
@@ -187,17 +190,18 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
         m_temp = CONVERT_TYPE(vload4(midx,&mask[moffset]));
         int idx_c = (idx << 2) + offset;
         temploc = (VEC_TYPE_LOC)(idx_c,idx_c+1,idx_c+2,idx_c+3);
-        if(id % cols == cols - 1)
+        if (id % cols == cols - 1)
         {
             repeat_me(m_temp);
             repeat_e(temploc);
         }
-        minval = min(minval,m_temp != (VEC_TYPE)0 ? temp : minval);
-        maxval = max(maxval,m_temp != (VEC_TYPE)0 ? temp : maxval);
+        minval = min(minval, m_temp != zeroVal ? temp : minval);
+        maxval = max(maxval, m_temp != zeroVal ? temp : maxval);
 
-        minloc = CONDITION_FUNC((minval == temp) && (m_temp != (VEC_TYPE)0), temploc , minloc);
-        maxloc = CONDITION_FUNC((maxval == temp) && (m_temp != (VEC_TYPE)0), temploc , maxloc);
+        minloc = CONDITION_FUNC(minval == temp && m_temp != zeroVal, temploc , minloc);
+        maxloc = CONDITION_FUNC(maxval == temp && m_temp != zeroVal, temploc , maxloc);
     }
+
     if(lid > 127)
     {
         lm_min[lid - 128] = minval;
@@ -206,32 +210,37 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
         lm_maxloc[lid - 128] = maxloc;
     }
     barrier(CLK_LOCAL_MEM_FENCE);
+
     if(lid < 128)
     {
-        lm_min[lid] = min(minval,lm_min[lid]);
-        lm_max[lid] = max(maxval,lm_max[lid]);
+        lm_min[lid] = min(minval, lm_min[lid]);
+        lm_max[lid] = max(maxval, lm_max[lid]);
         VEC_TYPE con_min = CONVERT_TYPE(minloc != negative ? one : zero);
         VEC_TYPE con_max = CONVERT_TYPE(maxloc != negative ? one : zero);
-        lm_minloc[lid] = CONDITION_FUNC((lm_min[lid] == minval) && (con_min != (VEC_TYPE)0), minloc , lm_minloc[lid]);
-        lm_maxloc[lid] = CONDITION_FUNC((lm_max[lid] == maxval) && (con_max != (VEC_TYPE)0), maxloc , lm_maxloc[lid]);
+        VEC_TYPE lmMinVal = lm_min[lid], lmMaxVal = lm_max[lid];
+        lm_minloc[lid] = CONDITION_FUNC(lmMinVal == minval && con_min != zeroVal, minloc , lm_minloc[lid]);
+        lm_maxloc[lid] = CONDITION_FUNC(lmMaxVal == maxval && con_max != zeroVal, maxloc , lm_maxloc[lid]);
     }
     barrier(CLK_LOCAL_MEM_FENCE);
+
     for(int lsize = 64; lsize > 0; lsize >>= 1)
     {
         if(lid < lsize)
         {
             int lid2 = lsize + lid;
-            lm_min[lid] = min(lm_min[lid] , lm_min[lid2]);
-            lm_max[lid] = max(lm_max[lid] , lm_max[lid2]);
+            lm_min[lid] = min(lm_min[lid], lm_min[lid2]);
+            lm_max[lid] = max(lm_max[lid], lm_max[lid2]);
             VEC_TYPE con_min = CONVERT_TYPE(lm_minloc[lid2] != negative ? one : zero);
             VEC_TYPE con_max = CONVERT_TYPE(lm_maxloc[lid2] != negative ? one : zero);
-            lm_minloc[lid] =
-                CONDITION_FUNC((lm_min[lid] == lm_min[lid2]) && (con_min != (VEC_TYPE)0), lm_minloc[lid2] , lm_minloc[lid]);
-            lm_maxloc[lid] =
-                CONDITION_FUNC((lm_max[lid] == lm_max[lid2]) && (con_max != (VEC_TYPE)0), lm_maxloc[lid2] , lm_maxloc[lid]);
+
+            VEC_TYPE lmMinVal1 = lm_min[lid], lmMinVal2 = lm_min[lid2];
+            VEC_TYPE lmMaxVal1 = lm_max[lid], lmMaxVal2 = lm_max[lid2];
+            lm_minloc[lid] = CONDITION_FUNC(lmMinVal1 == lmMinVal2 && con_min != zeroVal, lm_minloc[lid2] , lm_minloc[lid]);
+            lm_maxloc[lid] = CONDITION_FUNC(lmMaxVal1 == lmMaxVal2 && con_max != zeroVal, lm_maxloc[lid2] , lm_maxloc[lid]);
         }
         barrier(CLK_LOCAL_MEM_FENCE);
     }
+
     if( lid == 0)
     {
         dst[gid] = CONVERT_RES_TYPE(lm_min[0]);

From 1e2fb6de3065547d7107cb6eec628c9f7f0802da Mon Sep 17 00:00:00 2001
From: perping <erping@multicorewareinc.com>
Date: Wed, 13 Nov 2013 10:09:39 +0800
Subject: [PATCH 25/45] fix match_template and haar

---
 modules/ocl/src/haar.cpp           |  6 +++---
 modules/ocl/src/match_template.cpp | 20 ++++++++++++++++----
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index bd5a8d1e8..ac8a64e73 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -800,7 +800,7 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
 
             indexy += sz.height;
         }
-        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+        if(gsqsum_t.depth() == CV_64F)
             gsqsum_t.convertTo(gsqsum, CV_32FC1);
         else
             gsqsum = gsqsum_t;
@@ -1294,7 +1294,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
             cv::ocl::integral(resizeroi, gimgroi, gimgroisq);
             indexy += sz.height;
         }
-        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+        if(gsqsum_t.depth() == CV_64F)
             gsqsum_t.convertTo(gsqsum, CV_32FC1);
         else
             gsqsum = gsqsum_t;
@@ -1360,7 +1360,7 @@ void cv::ocl::OclCascadeClassifierBuf::detectMultiScale(oclMat &gimg, CV_OUT std
     else
     {
         cv::ocl::integral(gimg, gsum, gsqsum_t);
-        if(Context::getContext()->supportsFeature(FEATURE_CL_DOUBLE))
+        if(gsqsum_t.depth() == CV_64F)
             gsqsum_t.convertTo(gsqsum, CV_32FC1);
         else
             gsqsum = gsqsum_t;
diff --git a/modules/ocl/src/match_template.cpp b/modules/ocl/src/match_template.cpp
index d18bacc6f..28397b608 100644
--- a/modules/ocl/src/match_template.cpp
+++ b/modules/ocl/src/match_template.cpp
@@ -250,7 +250,10 @@ namespace cv
             buf.image_sums.resize(1);
             buf.image_sqsums.resize(1);
             integral(image.reshape(1), buf.image_sums[0], temp);
-            temp.convertTo(buf.image_sqsums[0], CV_32FC1);
+            if(temp.depth() == CV_64F)
+                temp.convertTo(buf.image_sqsums[0], CV_32FC1);
+            else
+                buf.image_sqsums[0] = temp;
             unsigned long long templ_sqsum = (unsigned long long)sqrSum(templ.reshape(1))[0];
 
             Context *clCxt = image.clCxt;
@@ -416,7 +419,12 @@ namespace cv
             {
                 buf.image_sums.resize(1);
                 buf.image_sqsums.resize(1);
-                integral(image, buf.image_sums[0], buf.image_sqsums[0]);
+                cv::ocl::oclMat temp;
+                integral(image, buf.image_sums[0], temp);
+                if(temp.depth() == CV_64F)
+                    temp.convertTo(buf.image_sqsums[0], CV_32FC1);
+                else
+                    buf.image_sqsums[0] = temp;
 
                 templ_sum[0]   = (float)sum(templ)[0];
 
@@ -452,10 +460,14 @@ namespace cv
                 templ_sum   *= scale;
                 buf.image_sums.resize(buf.images.size());
                 buf.image_sqsums.resize(buf.images.size());
-
+                cv::ocl::oclMat temp;
                 for(int i = 0; i < image.oclchannels(); i ++)
                 {
-                    integral(buf.images[i], buf.image_sums[i], buf.image_sqsums[i]);
+                    integral(buf.images[i], buf.image_sums[i], temp);
+                    if(temp.depth() == CV_64F)
+                        temp.convertTo(buf.image_sqsums[i], CV_32FC1);
+                    else
+                        buf.image_sqsums[i] = temp;
                 }
 
                 switch(image.oclchannels())

From 185059959bf43251638f7e8cdb14c5bd7753d450 Mon Sep 17 00:00:00 2001
From: perping <erping@multicorewareinc.com>
Date: Wed, 13 Nov 2013 10:19:09 +0800
Subject: [PATCH 26/45] fix haar

---
 modules/ocl/src/haar.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/ocl/src/haar.cpp b/modules/ocl/src/haar.cpp
index ac8a64e73..deff8671e 100644
--- a/modules/ocl/src/haar.cpp
+++ b/modules/ocl/src/haar.cpp
@@ -1013,7 +1013,10 @@ CvSeq *cv::ocl::OclCascadeClassifier::oclHaarDetectObjects( oclMat &gimg, CvMemS
         oclMat gsqsum;
         oclMat gsqsum_t;
         cv::ocl::integral(gimg, gsum, gsqsum_t);
-        gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        if(gsqsum_t.depth() == CV_64F)
+            gsqsum_t.convertTo(gsqsum, CV_32FC1);
+        else
+            gsqsum = gsqsum_t;
         CvSize sz;
         vector<CvSize> sizev;
         vector<float> scalev;

From c38e4f9639cfeb834c8e94012ab6f968e2c0e07b Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 13 Nov 2013 13:04:02 +0400
Subject: [PATCH 27/45] added eps to ocl::buildWarpPerspectiveMaps perf test

---
 modules/ocl/perf/perf_imgwarp.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/ocl/perf/perf_imgwarp.cpp b/modules/ocl/perf/perf_imgwarp.cpp
index 0aff45e9a..c69e11518 100644
--- a/modules/ocl/perf/perf_imgwarp.cpp
+++ b/modules/ocl/perf/perf_imgwarp.cpp
@@ -292,6 +292,7 @@ PERF_TEST_P(buildWarpPerspectiveMapsFixture, Inverse, OCL_TYPICAL_MAT_SIZES)
     };
     Mat M(3, 3, CV_64F, (void *)coeffs);
     const Size dsize = GetParam();
+    const double eps = 5e-4;
 
     Mat xmap(dsize, CV_32FC1), ymap(dsize, CV_32FC1);
     declare.in(M).out(xmap, ymap);
@@ -305,15 +306,15 @@ PERF_TEST_P(buildWarpPerspectiveMapsFixture, Inverse, OCL_TYPICAL_MAT_SIZES)
         oclXMap.download(xmap);
         oclYMap.download(ymap);
 
-        SANITY_CHECK(xmap);
-        SANITY_CHECK(ymap);
+        SANITY_CHECK(xmap, eps);
+        SANITY_CHECK(ymap, eps);
     }
     else if (RUN_PLAIN_IMPL)
     {
         TEST_CYCLE() buildWarpPerspectiveMaps(M, true, dsize, xmap, ymap);
 
-        SANITY_CHECK(xmap);
-        SANITY_CHECK(ymap);
+        SANITY_CHECK(xmap, eps);
+        SANITY_CHECK(ymap, eps);
     }
     else
         OCL_PERF_ELSE

From 6770c040739da10137cea34b60950e94eab985dc Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Tue, 12 Nov 2013 16:50:42 +0400
Subject: [PATCH 28/45] refactored some functions from ocl arithm

---
 modules/ocl/src/arithm.cpp                   | 122 ++++++++++---------
 modules/ocl/src/opencl/arithm_cartToPolar.cl |  44 ++++---
 modules/ocl/src/opencl/arithm_magnitude.cl   |  48 ++------
 modules/ocl/src/opencl/arithm_polarToCart.cl |  92 ++++++++------
 modules/ocl/src/opencl/arithm_pow.cl         |  42 ++-----
 5 files changed, 163 insertions(+), 185 deletions(-)

diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp
index f8a069082..997b2010f 100644
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -867,30 +867,32 @@ void cv::ocl::log(const oclMat &src, oclMat &dst)
 
 static void arithmetic_magnitude_phase_run(const oclMat &src1, const oclMat &src2, oclMat &dst, string kernelName)
 {
-    int channels = dst.oclchannels();
     int depth = dst.depth();
 
-    size_t vector_length = 1;
-    int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
-    int cols = divUp(dst.cols * channels + offset_cols, vector_length);
-
     size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { cols, dst.rows, 1 };
+    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
+
+    int src1_step = src1.step / src1.elemSize(), src1_offset = src1.offset / src1.elemSize();
+    int src2_step = src2.step / src2.elemSize(), src2_offset = src2.offset / src2.elemSize();
+    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
 
     vector<pair<size_t , const void *> > args;
     args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
 
-    openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, depth);
+    const char * const channelMap[] = { "", "", "2", "4", "4" };
+    std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[dst.channels()]);
+
+    openCLExecuteKernel(src1.clCxt, &arithm_magnitude, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
 }
 
 void cv::ocl::magnitude(const oclMat &src1, const oclMat &src2, oclMat &dst)
@@ -964,25 +966,29 @@ static void arithmetic_cartToPolar_run(const oclMat &src1, const oclMat &src2, o
     size_t localThreads[3]  = { 64, 4, 1 };
     size_t globalThreads[3] = { cols, src1.rows, 1 };
 
-    int tmp = angleInDegrees ? 1 : 0;
+    int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
+    int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1();
+    int dst_mag_step = dst_mag.step / dst_mag.elemSize1(), dst_mag_offset = dst_mag.offset / dst_mag.elemSize1();
+    int dst_cart_step = dst_cart.step / dst_cart.elemSize1(), dst_cart_offset = dst_cart.offset / dst_cart.elemSize1();
+
     vector<pair<size_t , const void *> > args;
     args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst_mag.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_mag_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst_cart.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_cart_offset ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&src1.rows ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&tmp ));
 
-    openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args, -1, depth);
+    openCLExecuteKernel(src1.clCxt, &arithm_cartToPolar, kernelName, globalThreads, localThreads, args,
+                        -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN");
 }
 
 void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat &angle, bool angleInDegrees)
@@ -1008,37 +1014,38 @@ void cv::ocl::cartToPolar(const oclMat &x, const oclMat &y, oclMat &mag, oclMat
 static void arithmetic_ptc_run(const oclMat &src1, const oclMat &src2, oclMat &dst1, oclMat &dst2, bool angleInDegrees,
                         string kernelName)
 {
-    int channels = src2.oclchannels();
-    int depth = src2.depth();
-
-    int cols = src2.cols * channels;
-    int rows = src2.rows;
+    int channels = src2.oclchannels(), depth = src2.depth();
+    int cols = src2.cols * channels, rows = src2.rows;
 
     size_t localThreads[3]  = { 64, 4, 1 };
     size_t globalThreads[3] = { cols, rows, 1 };
 
-    int tmp = angleInDegrees ? 1 : 0;
+    int src1_step = src1.step / src1.elemSize1(), src1_offset = src1.offset / src1.elemSize1();
+    int src2_step = src2.step / src2.elemSize1(), src2_offset = src2.offset / src2.elemSize1();
+    int dst1_step = dst1.step / dst1.elemSize1(), dst1_offset = dst1.offset / dst1.elemSize1();
+    int dst2_step = dst2.step / dst2.elemSize1(), dst2_offset = dst2.offset / dst2.elemSize1();
+
     vector<pair<size_t , const void *> > args;
     if (src1.data)
     {
         args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
-        args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
-        args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&src1_step ));
+        args.push_back( make_pair( sizeof(cl_int), (void *)&src1_offset ));
     }
     args.push_back( make_pair( sizeof(cl_mem), (void *)&src2.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src2.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src2_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst1.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst1.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst1.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst1_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst1_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst2.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst2.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst2.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst2_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst2_offset ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&rows ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&tmp ));
 
-    openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads, args, -1, depth);
+    openCLExecuteKernel(src1.clCxt, &arithm_polarToCart, kernelName, globalThreads, localThreads,
+                        args, -1, depth, angleInDegrees ? "-D DEGREE" : "-D RADIAN");
 }
 
 void cv::ocl::polarToCart(const oclMat &magnitude, const oclMat &angle, oclMat &x, oclMat &y, bool angleInDegrees)
@@ -1623,38 +1630,37 @@ void cv::ocl::addWeighted(const oclMat &src1, double alpha, const oclMat &src2,
 /////////////////////////////////// Pow //////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////////
 
-static void arithmetic_pow_run(const oclMat &src1, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
+static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string kernelName, const cv::ocl::ProgramEntry* source)
 {
     int channels = dst.oclchannels();
     int depth = dst.depth();
 
-    size_t vector_length = 1;
-    int offset_cols = ((dst.offset % dst.step) / dst.elemSize1()) & (vector_length - 1);
-    int cols = divUp(dst.cols * channels + offset_cols, vector_length);
-    int rows = dst.rows;
-
     size_t localThreads[3]  = { 64, 4, 1 };
-    size_t globalThreads[3] = { cols, rows, 1 };
+    size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
+
+    const char * const channelMap[] = { "", "", "2", "4", "4" };
+    std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[channels]);
+
+    int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
+    int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
 
-    int dst_step1 = dst.cols * dst.elemSize();
     vector<pair<size_t , const void *> > args;
-    args.push_back( make_pair( sizeof(cl_mem), (void *)&src1.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&src1.offset ));
+    args.push_back( make_pair( sizeof(cl_mem), (void *)&src.data ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&src_offset ));
     args.push_back( make_pair( sizeof(cl_mem), (void *)&dst.data ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.step ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.offset ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_offset ));
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.rows ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&cols ));
-    args.push_back( make_pair( sizeof(cl_int), (void *)&dst_step1 ));
+    args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
 
     float pf = static_cast<float>(p);
-    if (!src1.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
+    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
         args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
     else
         args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
 
-    openCLExecuteKernel(src1.clCxt, source, kernelName, globalThreads, localThreads, args, -1, depth);
+    openCLExecuteKernel(src.clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, buildOptions.c_str());
 }
 
 void cv::ocl::pow(const oclMat &x, double p, oclMat &y)
diff --git a/modules/ocl/src/opencl/arithm_cartToPolar.cl b/modules/ocl/src/opencl/arithm_cartToPolar.cl
index f634f2d42..c65f899b7 100644
--- a/modules/ocl/src/opencl/arithm_cartToPolar.cl
+++ b/modules/ocl/src/opencl/arithm_cartToPolar.cl
@@ -58,21 +58,21 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
                                      __global float *src2, int src2_step, int src2_offset,
                                      __global float *dst1, int dst1_step, int dst1_offset, // magnitude
                                      __global float *dst2, int dst2_step, int dst2_offset, // cartToPolar
-                                     int rows, int cols, int angInDegree)
+                                     int rows, int cols)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
 
     if (x < cols && y < rows)
     {
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
+        int src1_index = mad24(y, src1_step, x + src1_offset);
+        int src2_index = mad24(y, src2_step, x + src2_offset);
 
-        int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset);
+        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
+        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
 
-        float x = *((__global float *)((__global char *)src1 + src1_index));
-        float y = *((__global float *)((__global char *)src2 + src2_index));
+        float x = src1[src1_index];
+        float y = src2[src2_index];
 
         float x2 = x * x;
         float y2 = y * y;
@@ -86,10 +86,12 @@ __kernel void arithm_cartToPolar_D5 (__global float *src1, int src1_step, int sr
         float cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + FLT_EPSILON) + tmp :
                                  tmp1 - x*y/(y2 + 0.28f*x2 + FLT_EPSILON);
 
-        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
+#ifdef DEGREE
+        cartToPolar *= (180/CV_PI);
+#endif
 
-        *((__global float *)((__global char *)dst1 + dst1_index)) = magnitude;
-        *((__global float *)((__global char *)dst2 + dst2_index)) = cartToPolar;
+        dst1[dst1_index] = magnitude;
+        dst2[dst2_index] = cartToPolar;
     }
 }
 
@@ -99,21 +101,21 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
                                      __global double *src2, int src2_step, int src2_offset,
                                      __global double *dst1, int dst1_step, int dst1_offset,
                                      __global double *dst2, int dst2_step, int dst2_offset,
-                                     int rows, int cols, int angInDegree)
+                                     int rows, int cols)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
 
     if (x < cols && y < rows)
     {
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
+        int src1_index = mad24(y, src1_step, x + src1_offset);
+        int src2_index = mad24(y, src2_step, x + src2_offset);
 
-        int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset);
+        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
+        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
 
-        double x = *((__global double *)((__global char *)src1 + src1_index));
-        double y = *((__global double *)((__global char *)src2 + src2_index));
+        double x = src1[src1_index];
+        double y = src2[src2_index];
 
         double x2 = x * x;
         double y2 = y * y;
@@ -127,10 +129,12 @@ __kernel void arithm_cartToPolar_D6 (__global double *src1, int src1_step, int s
         double cartToPolar = y2 <= x2 ? x*y/(x2 + 0.28f*y2 + DBL_EPSILON)  + tmp :
                                  tmp1 - x*y/(y2 + 0.28f*x2 + DBL_EPSILON);
 
-        cartToPolar = angInDegree == 0 ? cartToPolar : cartToPolar * (180/CV_PI);
+#ifdef DEGREE
+        cartToPolar *= (180/CV_PI);
+#endif
 
-        *((__global double *)((__global char *)dst1 + dst1_index)) = magnitude;
-        *((__global double *)((__global char *)dst2 + dst2_index)) = cartToPolar;
+        dst1[dst1_index] = magnitude;
+        dst2[dst2_index] = cartToPolar;
     }
 }
 
diff --git a/modules/ocl/src/opencl/arithm_magnitude.cl b/modules/ocl/src/opencl/arithm_magnitude.cl
index 6fd2ac383..1053efd00 100644
--- a/modules/ocl/src/opencl/arithm_magnitude.cl
+++ b/modules/ocl/src/opencl/arithm_magnitude.cl
@@ -51,50 +51,24 @@
 #endif
 #endif
 
-__kernel void arithm_magnitude_D5 (__global float *src1, int src1_step, int src1_offset,
-                                   __global float *src2, int src2_step, int src2_offset,
-                                   __global float *dst,  int dst_step,  int dst_offset,
-                                  int rows, int cols)
+__kernel void arithm_magnitude(__global T *src1, int src1_step, int src1_offset,
+                               __global T *src2, int src2_step, int src2_offset,
+                               __global T *dst,  int dst_step,  int dst_offset,
+                               int rows, int cols)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
 
     if (x < cols && y < rows)
     {
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 2) + dst_offset);
+        int src1_index = mad24(y, src1_step, x + src1_offset);
+        int src2_index = mad24(y, src2_step, x + src2_offset);
+        int dst_index  = mad24(y, dst_step,  x + dst_offset);
 
-        float data1 = *((__global float *)((__global char *)src1 + src1_index));
-        float data2 = *((__global float *)((__global char *)src2 + src2_index));
+        T data1 = src1[src1_index];
+        T data2 = src2[src2_index];
 
-        float tmp = sqrt(data1 * data1 + data2 * data2);
-
-        *((__global float *)((__global char *)dst + dst_index)) = tmp;
+        T tmp = hypot(data1, data2);
+        dst[dst_index] = tmp;
     }
 }
-
-#if defined (DOUBLE_SUPPORT)
-__kernel void arithm_magnitude_D6 (__global double *src1, int src1_step, int src1_offset,
-                                   __global double *src2, int src2_step, int src2_offset,
-                                   __global double *dst,  int dst_step,  int dst_offset,
-                                  int rows, int cols)
-{
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if (x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 3) + dst_offset);
-
-        double data1 = *((__global double *)((__global char *)src1 + src1_index));
-        double data2 = *((__global double *)((__global char *)src2 + src2_index));
-
-        double tmp = sqrt(data1 * data1 + data2 * data2);
-
-        *((__global double *)((__global char *)dst + dst_index)) = tmp;
-    }
-}
-#endif
diff --git a/modules/ocl/src/opencl/arithm_polarToCart.cl b/modules/ocl/src/opencl/arithm_polarToCart.cl
index f3ec3117d..024f1f0ee 100644
--- a/modules/ocl/src/opencl/arithm_polarToCart.cl
+++ b/modules/ocl/src/opencl/arithm_polarToCart.cl
@@ -57,33 +57,38 @@
 /////////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////polarToCart with magnitude//////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void arithm_polarToCart_mag_D5 (__global float *src1, int src1_step, int src1_offset,//magnitue
                                          __global float *src2, int src2_step, int src2_offset,//angle
                                          __global float *dst1, int dst1_step, int dst1_offset,
                                          __global float *dst2, int dst2_step, int dst2_offset,
-                                         int rows, int cols, int angInDegree)
+                                         int rows, int cols)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
 
     if (x < cols && y < rows)
     {
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 2) + src2_offset);
+        int src1_index = mad24(y, src1_step, x + src1_offset);
+        int src2_index = mad24(y, src2_step, x + src2_offset);
 
-        int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset);
+        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
+        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
 
-        float x = *((__global float *)((__global char *)src1 + src1_index));
-        float y = *((__global float *)((__global char *)src2 + src2_index));
+        float x = src1[src1_index];
+        float y = src2[src2_index];
 
+#ifdef DEGREE
         float ascale = CV_PI/180.0f;
-        float alpha  = angInDegree == 1 ? y * ascale : y;
+        float alpha = y * ascale;
+#else
+        float alpha = y;
+#endif
         float a = cos(alpha) * x;
         float b = sin(alpha) * x;
 
-        *((__global float *)((__global char *)dst1 + dst1_index)) = a;
-        *((__global float *)((__global char *)dst2 + dst2_index)) = b;
+        dst1[dst1_index] = a;
+        dst2[dst2_index] = b;
     }
 }
 
@@ -92,29 +97,33 @@ __kernel void arithm_polarToCart_mag_D6 (__global double *src1, int src1_step, i
                                          __global double *src2, int src2_step, int src2_offset,//angle
                                          __global double *dst1, int dst1_step, int dst1_offset,
                                          __global double *dst2, int dst2_step, int dst2_offset,
-                                         int rows, int cols, int angInDegree)
+                                         int rows, int cols)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
 
     if (x < cols && y < rows)
     {
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
-        int src2_index = mad24(y, src2_step, (x << 3) + src2_offset);
+        int src1_index = mad24(y, src1_step, x + src1_offset);
+        int src2_index = mad24(y, src2_step, x + src2_offset);
 
-        int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset);
+        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
+        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
 
-        double x = *((__global double *)((__global char *)src1 + src1_index));
-        double y = *((__global double *)((__global char *)src2 + src2_index));
+        double x = src1[src1_index];
+        double y = src2[src2_index];
 
+#ifdef DEGREE
         float ascale = CV_PI/180.0;
-        double alpha  = angInDegree == 1 ? y * ascale : y;
+        float alpha = y * ascale;
+#else
+        float alpha = y;
+#endif
         double a = cos(alpha) * x;
         double b = sin(alpha) * x;
 
-        *((__global double *)((__global char *)dst1 + dst1_index)) = a;
-        *((__global double *)((__global char *)dst2 + dst2_index)) = b;
+        dst1[dst1_index] = a;
+        dst2[dst2_index] = b;
     }
 }
 #endif
@@ -122,30 +131,35 @@ __kernel void arithm_polarToCart_mag_D6 (__global double *src1, int src1_step, i
 /////////////////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////polarToCart without magnitude//////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////
+
 __kernel void arithm_polarToCart_D5 (__global float *src,  int src_step,  int src_offset,//angle
                                      __global float *dst1, int dst1_step, int dst1_offset,
                                      __global float *dst2, int dst2_step, int dst2_offset,
-                                     int rows, int cols, int angInDegree)
+                                     int rows, int cols)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
 
     if (x < cols && y < rows)
     {
-        int src_index  = mad24(y, src_step,  (x << 2) + src_offset);
+        int src_index  = mad24(y, src_step,  x + src_offset);
 
-        int dst1_index = mad24(y, dst1_step, (x << 2) + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, (x << 2) + dst2_offset);
+        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
+        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
 
-        float y = *((__global float *)((__global char *)src + src_index));
+        float y = src[src_index];
 
+#ifdef DEGREE
         float ascale = CV_PI/180.0f;
-        float alpha  = angInDegree == 1 ? y * ascale : y;
+        float alpha = y * ascale;
+#else
+        float alpha = y;
+#endif
         float a = cos(alpha);
         float b = sin(alpha);
 
-        *((__global float *)((__global char *)dst1 + dst1_index)) = a;
-        *((__global float *)((__global char *)dst2 + dst2_index)) = b;
+        dst1[dst1_index] = a;
+        dst2[dst2_index] = b;
     }
 }
 
@@ -153,27 +167,31 @@ __kernel void arithm_polarToCart_D5 (__global float *src,  int src_step,  int sr
 __kernel void arithm_polarToCart_D6 (__global float *src,  int src_step,  int src_offset,//angle
                                      __global float *dst1, int dst1_step, int dst1_offset,
                                      __global float *dst2, int dst2_step, int dst2_offset,
-                                     int rows, int cols, int angInDegree)
+                                     int rows, int cols)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
 
     if (x < cols && y < rows)
     {
-        int src_index  = mad24(y, src_step,  (x << 3) + src_offset);
+        int src_index  = mad24(y, src_step,  x + src_offset);
 
-        int dst1_index = mad24(y, dst1_step, (x << 3) + dst1_offset);
-        int dst2_index = mad24(y, dst2_step, (x << 3) + dst2_offset);
+        int dst1_index = mad24(y, dst1_step, x + dst1_offset);
+        int dst2_index = mad24(y, dst2_step, x + dst2_offset);
 
-        double y = *((__global double *)((__global char *)src + src_index));
+        double y = src[src_index];
 
-        float ascale = CV_PI/180.0;
-        double alpha  = angInDegree == 1 ? y * ascale : y;
+#ifdef DEGREE
+        float ascale = CV_PI/180.0f;
+        float alpha = y * ascale;
+#else
+        float alpha = y;
+#endif
         double a = cos(alpha);
         double b = sin(alpha);
 
-        *((__global double *)((__global char *)dst1 + dst1_index)) = a;
-        *((__global double *)((__global char *)dst2 + dst2_index)) = b;
+        dst1[dst1_index] = a;
+        dst2[dst2_index] = b;
     }
 }
 #endif
diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl
index 36a22b628..bb0673d4a 100644
--- a/modules/ocl/src/opencl/arithm_pow.cl
+++ b/modules/ocl/src/opencl/arithm_pow.cl
@@ -56,45 +56,21 @@
 
 /************************************** pow **************************************/
 
-__kernel void arithm_pow_D5 (__global float *src1, int src1_step, int src1_offset,
-                             __global float *dst,  int dst_step,  int dst_offset,
-                             int rows, int cols, int dst_step1, F p)
+__kernel void arithm_pow(__global T * src, int src_step, int src_offset,
+                         __global T * dst, int dst_step, int dst_offset,
+                         int rows, int cols, F p)
 {
-
     int x = get_global_id(0);
     int y = get_global_id(1);
 
-    if(x < cols && y < rows)
+    if (x < cols && y < rows)
     {
-        int src1_index = mad24(y, src1_step, (x << 2) + src1_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 2) + dst_offset);
+        int src_index = mad24(y, src_step, x + src_offset);
+        int dst_index = mad24(y, dst_step, x + dst_offset);
 
-        float src1_data = *((__global float *)((__global char *)src1 + src1_index));
-        float tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
+        T src_data = src[src_index];
+        T tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
 
-        *((__global float *)((__global char *)dst + dst_index)) = tmp;
+        dst[dst_index] = tmp;
     }
 }
-
-#if defined (DOUBLE_SUPPORT)
-
-__kernel void arithm_pow_D6 (__global double *src1, int src1_step, int src1_offset,
-                             __global double *dst,  int dst_step,  int dst_offset,
-                             int rows, int cols, int dst_step1, F p)
-{
-
-    int x = get_global_id(0);
-    int y = get_global_id(1);
-
-    if(x < cols && y < rows)
-    {
-        int src1_index = mad24(y, src1_step, (x << 3) + src1_offset);
-        int dst_index  = mad24(y, dst_step,  (x << 3) + dst_offset);
-
-        double src1_data = *((__global double *)((__global char *)src1 + src1_index));
-        double tmp = src1_data > 0 ? exp(p * log(src1_data)) : (src1_data == 0 ? 0 : exp(p * log(fabs(src1_data))));
-        *((__global double *)((__global char *)dst + dst_index)) = tmp;
-    }
-}
-
-#endif

From f1873bbca19400110f6a0fa4333453165ebdc963 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Tue, 12 Nov 2013 18:15:50 +0400
Subject: [PATCH 29/45] perf tests: allow to skip performance tests

---
 modules/ts/include/opencv2/ts/ts_perf.hpp |  3 ++
 modules/ts/src/ts_perf.cpp                | 47 ++++++++++++++++-------
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/modules/ts/include/opencv2/ts/ts_perf.hpp b/modules/ts/include/opencv2/ts/ts_perf.hpp
index 6a0e9215b..9238b3e34 100644
--- a/modules/ts/include/opencv2/ts/ts_perf.hpp
+++ b/modules/ts/include/opencv2/ts/ts_perf.hpp
@@ -243,6 +243,7 @@ typedef struct CV_EXPORTS performance_metrics
         TERM_TIME = 1,
         TERM_INTERRUPT = 2,
         TERM_EXCEPTION = 3,
+        TERM_SKIP_TEST = 4, // there are some limitations and test should be skipped
         TERM_UNKNOWN = -1
     };
 
@@ -279,6 +280,8 @@ public:
     static enum PERF_STRATEGY getPerformanceStrategy();
     static enum PERF_STRATEGY setPerformanceStrategy(enum PERF_STRATEGY strategy);
 
+    class PerfSkipTestException: public cv::Exception {};
+
 protected:
     virtual void PerfTestBody() = 0;
 
diff --git a/modules/ts/src/ts_perf.cpp b/modules/ts/src/ts_perf.cpp
index 319076ca8..3bcb5c11a 100644
--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@@ -1175,7 +1175,14 @@ void TestBase::reportMetrics(bool toJUnitXML)
 {
     performance_metrics& m = calcMetrics();
 
-    if (toJUnitXML)
+    if (m.terminationReason == performance_metrics::TERM_SKIP_TEST)
+    {
+        if (toJUnitXML)
+        {
+            RecordProperty("custom_status", "skipped");
+        }
+    }
+    else if (toJUnitXML)
     {
         RecordProperty("bytesIn", (int)m.bytesIn);
         RecordProperty("bytesOut", (int)m.bytesOut);
@@ -1267,21 +1274,30 @@ void TestBase::SetUp()
 
 void TestBase::TearDown()
 {
-    if (!HasFailure() && !verified)
-        ADD_FAILURE() << "The test has no sanity checks. There should be at least one check at the end of performance test.";
-
-    validateMetrics();
-    if (HasFailure())
-        reportMetrics(false);
+    if (metrics.terminationReason == performance_metrics::TERM_SKIP_TEST)
+    {
+        LOGI("\tTest was skipped");
+        GTEST_SUCCEED() << "Test was skipped";
+    }
     else
     {
-        const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
-        const char* type_param = test_info->type_param();
-        const char* value_param = test_info->value_param();
-        if (value_param) printf("[ VALUE    ] \t%s\n", value_param), fflush(stdout);
-        if (type_param)  printf("[ TYPE     ] \t%s\n", type_param), fflush(stdout);
-        reportMetrics(true);
+        if (!HasFailure() && !verified)
+            ADD_FAILURE() << "The test has no sanity checks. There should be at least one check at the end of performance test.";
+
+        validateMetrics();
+        if (HasFailure())
+        {
+            reportMetrics(false);
+            return;
+        }
     }
+
+    const ::testing::TestInfo* const test_info = ::testing::UnitTest::GetInstance()->current_test_info();
+    const char* type_param = test_info->type_param();
+    const char* value_param = test_info->value_param();
+    if (value_param) printf("[ VALUE    ] \t%s\n", value_param), fflush(stdout);
+    if (type_param)  printf("[ TYPE     ] \t%s\n", type_param), fflush(stdout);
+    reportMetrics(true);
 }
 
 std::string TestBase::getDataPath(const std::string& relativePath)
@@ -1331,6 +1347,11 @@ void TestBase::RunPerfTestBody()
     {
         this->PerfTestBody();
     }
+    catch(PerfSkipTestException&)
+    {
+        metrics.terminationReason = performance_metrics::TERM_SKIP_TEST;
+        return;
+    }
     catch(PerfEarlyExitException&)
     {
         metrics.terminationReason = performance_metrics::TERM_INTERRUPT;

From fb326a58fc0b9c1b8bb64e115ab85b2354616729 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Tue, 12 Nov 2013 18:59:43 +0400
Subject: [PATCH 30/45] ocl: perf tests: skip tests if we fail check for
 maxMemoryAllocSize

---
 modules/ocl/perf/perf_arithm.cpp           |  3 +++
 modules/ocl/perf/perf_filters.cpp          | 18 ++++++++++++++
 modules/ocl/perf/perf_imgwarp.cpp          |  5 +++-
 modules/ocl/perf/perf_matrix_operation.cpp |  4 ++++
 modules/ocl/perf/perf_precomp.hpp          | 28 +++++++++++++++++++---
 modules/ocl/perf/perf_pyramid.cpp          | 10 ++++++--
 modules/ocl/perf/perf_split_merge.cpp      |  9 +++++--
 7 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp
index 24eab3b91..2699b44a7 100644
--- a/modules/ocl/perf/perf_arithm.cpp
+++ b/modules/ocl/perf/perf_arithm.cpp
@@ -1066,6 +1066,9 @@ PERF_TEST_P(RepeatFixture, Repeat,
     const int nx = 3, ny = 2;
     const Size dstSize(srcSize.width * nx, srcSize.height * ny);
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+    checkDeviceMaxMemoryAllocSize(dstSize, type);
+
     Mat src(srcSize, type), dst(dstSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp
index ef0ea7b45..7e5389df6 100644
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -61,6 +61,8 @@ PERF_TEST_P(BlurFixture, Blur,
     const Size srcSize = get<0>(params), ksize(3, 3);
     const int type = get<1>(params), bordertype = BORDER_CONSTANT;
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
@@ -99,6 +101,8 @@ PERF_TEST_P(LaplacianFixture, Laplacian,
     const Size srcSize = get<0>(params);
     const int type = get<1>(params), ksize = 3;
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
@@ -138,6 +142,8 @@ PERF_TEST_P(ErodeFixture, Erode,
     const int type = get<1>(params), ksize = 3;
     const Mat ker = getStructuringElement(MORPH_RECT, Size(ksize, ksize));
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst).in(ker);
 
@@ -176,6 +182,8 @@ PERF_TEST_P(SobelFixture, Sobel,
     const Size srcSize = get<0>(params);
     const int type = get<1>(params), dx = 1, dy = 1;
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
@@ -217,6 +225,8 @@ PERF_TEST_P(ScharrFixture, Scharr,
     const Size srcSize = get<0>(params);
     const int type = get<1>(params), dx = 1, dy = 0;
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type, sizeof(float) * 2);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
@@ -258,6 +268,8 @@ PERF_TEST_P(GaussianBlurFixture, GaussianBlur,
     const Size srcSize = get<0>(params);
     const int type = get<1>(params), ksize = 7;
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
@@ -295,6 +307,8 @@ PERF_TEST_P(filter2DFixture, filter2D,
     const Size srcSize = get<0>(params);
     const int type = get<1>(params), ksize = 3;
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
     Mat src(srcSize, type), dst(srcSize, type), kernel(ksize, ksize, CV_32SC1);
     declare.in(src, WARMUP_RNG).in(kernel).out(dst);
     randu(kernel, -3.0, 3.0);
@@ -335,6 +349,8 @@ PERF_TEST_P(BilateralFixture, Bilateral,
     const int type = get<1>(params), d = 7;
     const double sigmacolor = 50.0, sigmaspace = 50.0;
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
@@ -374,6 +390,8 @@ PERF_TEST_P(adaptiveBilateralFixture, adaptiveBilateral,
     const double sigmaspace = 10.0;
     Size ksize(9, 9);
 
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
     Mat src(srcSize, type), dst(srcSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
diff --git a/modules/ocl/perf/perf_imgwarp.cpp b/modules/ocl/perf/perf_imgwarp.cpp
index 0aff45e9a..01d4fd275 100644
--- a/modules/ocl/perf/perf_imgwarp.cpp
+++ b/modules/ocl/perf/perf_imgwarp.cpp
@@ -154,9 +154,12 @@ PERF_TEST_P(resizeFixture, resize,
     const Size srcSize = get<0>(params);
     const int type = get<1>(params), interType = get<2>(params);
     double scale = get<3>(params);
+    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
+
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+    checkDeviceMaxMemoryAllocSize(dstSize, type);
 
     Mat src(srcSize, type), dst;
-    const Size dstSize(cvRound(srcSize.width * scale), cvRound(srcSize.height * scale));
     dst.create(dstSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
     if (interType == INTER_LINEAR && type == CV_8UC4 && OCL_SIZE_4000 == srcSize)
diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp
index f2baa7ffc..5ca322e22 100644
--- a/modules/ocl/perf/perf_matrix_operation.cpp
+++ b/modules/ocl/perf/perf_matrix_operation.cpp
@@ -63,6 +63,10 @@ PERF_TEST_P(ConvertToFixture, ConvertTo,
 
     Mat src(srcSize, type), dst;
     const int dstType = CV_MAKE_TYPE(CV_32F, src.channels());
+
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+    checkDeviceMaxMemoryAllocSize(srcSize, dstType);
+
     dst.create(srcSize, dstType);
     declare.in(src, WARMUP_RNG).out(dst);
 
diff --git a/modules/ocl/perf/perf_precomp.hpp b/modules/ocl/perf/perf_precomp.hpp
index 2d9639a85..a8663df99 100644
--- a/modules/ocl/perf/perf_precomp.hpp
+++ b/modules/ocl/perf/perf_precomp.hpp
@@ -103,8 +103,30 @@ using namespace cv;
             CV_TEST_FAIL_NO_IMPL();
 #endif
 
-#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); ocl::finish(), stopTimer())
-#define OCL_TEST_CYCLE() for(; startTimer(), next(); ocl::finish(), stopTimer())
-#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ocl::finish(), ++r)
+#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); cv::ocl::finish(), stopTimer())
+#define OCL_TEST_CYCLE() for(; startTimer(), next(); cv::ocl::finish(), stopTimer())
+#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; cv::ocl::finish(), ++r)
+
+namespace cvtest {
+namespace ocl {
+inline void checkDeviceMaxMemoryAllocSize(const Size& size, int type, int factor = 1)
+{
+    assert(factor > 0);
+    if (!(IMPL_OCL == perf::TestBase::getSelectedImpl()))
+        return; // OpenCL devices are not used
+    int cn = CV_MAT_CN(type);
+    int cn_ocl = cn == 3 ? 4 : cn;
+    int type_ocl = CV_MAKE_TYPE(CV_MAT_DEPTH(type), cn_ocl);
+    size_t memSize = size.area() * CV_ELEM_SIZE(type_ocl);
+    const cv::ocl::DeviceInfo& devInfo = cv::ocl::Context::getContext()->getDeviceInfo();
+    if (memSize * factor >= devInfo.maxMemAllocSize)
+    {
+        throw perf::TestBase::PerfSkipTestException();
+    }
+}
+} // namespace cvtest::ocl
+} // namespace cvtest
+
+using namespace cvtest::ocl;
 
 #endif
diff --git a/modules/ocl/perf/perf_pyramid.cpp b/modules/ocl/perf/perf_pyramid.cpp
index c799853db..820dd6062 100644
--- a/modules/ocl/perf/perf_pyramid.cpp
+++ b/modules/ocl/perf/perf_pyramid.cpp
@@ -60,9 +60,12 @@ PERF_TEST_P(pyrDownFixture, pyrDown,
     const Size_MatType_t params = GetParam();
     const Size srcSize = get<0>(params);
     const int type = get<1>(params);
+    Size dstSize((srcSize.height + 1) >> 1, (srcSize.width + 1) >> 1);
+
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+    checkDeviceMaxMemoryAllocSize(dstSize, type);
 
     Mat src(srcSize, type), dst;
-    Size dstSize((srcSize.height + 1) >> 1, (srcSize.width + 1) >> 1);
     dst.create(dstSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
@@ -97,9 +100,12 @@ PERF_TEST_P(pyrUpFixture, pyrUp,
     const Size_MatType_t params = GetParam();
     const Size srcSize = get<0>(params);
     const int type = get<1>(params);
+    Size dstSize(srcSize.height << 1, srcSize.width << 1);
+
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+    checkDeviceMaxMemoryAllocSize(dstSize, type);
 
     Mat src(srcSize, type), dst;
-    Size dstSize(srcSize.height << 1, srcSize.width << 1);
     dst.create(dstSize, type);
     declare.in(src, WARMUP_RNG).out(dst);
 
diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp
index f2f7c4115..ecfc49e33 100644
--- a/modules/ocl/perf/perf_split_merge.cpp
+++ b/modules/ocl/perf/perf_split_merge.cpp
@@ -60,8 +60,10 @@ PERF_TEST_P(MergeFixture, Merge,
     const Size_MatType_t params = GetParam();
     const Size srcSize = get<0>(params);
     const int depth = get<1>(params), channels = 3;
-
     const int dstType = CV_MAKE_TYPE(depth, channels);
+
+    checkDeviceMaxMemoryAllocSize(srcSize, dstType);
+
     Mat dst(srcSize, dstType);
     vector<Mat> src(channels);
     for (vector<Mat>::iterator i = src.begin(), end = src.end(); i != end; ++i)
@@ -105,8 +107,11 @@ PERF_TEST_P(SplitFixture, Split,
     const Size_MatType_t params = GetParam();
     const Size srcSize = get<0>(params);
     const int depth = get<1>(params), channels = 3;
+    const int type = CV_MAKE_TYPE(depth, channels);
 
-    Mat src(srcSize, CV_MAKE_TYPE(depth, channels));
+    checkDeviceMaxMemoryAllocSize(srcSize, type);
+
+    Mat src(srcSize, type);
     declare.in(src, WARMUP_RNG);
 
     if (RUN_OCL_IMPL)

From 9007a02302809aefe5da9bd7f1bd2fd5a4640cf6 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@aldebaran-robotics.com>
Date: Wed, 13 Nov 2013 12:25:44 +0100
Subject: [PATCH 31/45] fixes http://code.opencv.org/issues/3367

---
 cmake/templates/OpenCVConfig.cmake.in | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in
index ee1eb5996..094cbd4d0 100644
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@@ -16,6 +16,7 @@
 #    If the module is found then OPENCV_<MODULE>_FOUND is set to TRUE.
 #
 #    This file will define the following variables:
+#      - OpenCV_FOUND                    : Set to TRUE is OpenCV was find_packaged before
 #      - OpenCV_LIBS                     : The list of all imported targets for OpenCV modules.
 #      - OpenCV_INCLUDE_DIRS             : The OpenCV include directories.
 #      - OpenCV_COMPUTE_CAPABILITIES     : The version of compute capability
@@ -41,7 +42,10 @@ if(ANDROID)
   string(REPLACE - _ modules_file_suffix "_${ANDROID_NDK_ABI_NAME}")
 endif()
 
-include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
+if(NOT OpenCV_FOUND)
+  include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
+  set(OpenCV_FOUND TRUE)
+endif()
 
 # TODO All things below should be reviewed. What is about of moving this code into related modules (special vars/hooks/files)
 

From af7c6144384f4b26a94575eac7ac97f70d32aa50 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 13 Nov 2013 17:09:05 +0400
Subject: [PATCH 32/45] added RGB[A] -> HSV[FULL] conversion

---
 modules/ocl/src/color.cpp           |  70 ++++++++++++++--
 modules/ocl/src/opencl/cvt_color.cl | 124 ++++++++++++++++++++++++++++
 modules/ocl/test/test_color.cpp     |  17 ++++
 3 files changed, 202 insertions(+), 9 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index b01dcef1f..c3f58c92c 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -51,12 +51,15 @@ using namespace cv;
 using namespace cv::ocl;
 
 static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                           const oclMat & data = oclMat())
+                           const std::string & additionalOptions = std::string(),
+                           const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
 {
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
 
     std::string build_options = format("-D DEPTH_%d", src.depth());
+    if (!additionalOptions.empty())
+        build_options += additionalOptions;
 
     vector<pair<size_t , const void *> > args;
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
@@ -69,8 +72,10 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
     args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
     args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
 
-    if (!data.empty())
-        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
+    if (!data1.empty())
+        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data ));
+    if (!data2.empty())
+        args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data ));
 
     size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
     openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
@@ -297,10 +302,6 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB_caller(src, dst, bidx, "YCrCb2RGB");
         break;
     }
-    /*
-    case CV_BGR5652GRAY: case CV_BGR5552GRAY:
-    case CV_GRAY2BGR565: case CV_GRAY2BGR555:
-    */
     case CV_BGR2XYZ:
     case CV_RGB2XYZ:
     {
@@ -343,7 +344,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         }
         oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32FC1 : CV_32SC1, pdata);
 
-        fromRGB_caller(src, dst, bidx, "RGB2XYZ", oclCoeffs);
+        fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs);
         break;
     }
     case CV_XYZ2BGR:
@@ -393,9 +394,60 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB_caller(src, dst, bidx, "XYZ2RGB", oclCoeffs);
         break;
     }
-    /*
     case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
     case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
+    {
+        CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
+        bidx = code == CV_BGR2HSV || code == CV_BGR2HLS ||
+            code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2;
+        int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV ||
+            code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256;
+        bool is_hsv = code == CV_BGR2HSV || code == CV_RGB2HSV || code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL;
+        dst.create(sz, CV_MAKETYPE(depth, 3));
+        std::string kernelName = std::string("RGB2") + (is_hsv ? "HSV" : "HLS");
+
+        if (is_hsv && depth == CV_8U)
+        {
+            static oclMat sdiv_data;
+            static oclMat hdiv_data180;
+            static oclMat hdiv_data256;
+            static int sdiv_table[256];
+            static int hdiv_table180[256];
+            static int hdiv_table256[256];
+            static volatile bool initialized180 = false, initialized256 = false;
+            volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
+
+            if (!initialized)
+            {
+                int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
+                oclMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
+
+                sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
+
+                int v = 255 << hsv_shift;
+                if (!initialized180 && !initialized256)
+                {
+                    for(int i = 1; i < 256; i++ )
+                        sdiv_table[i] = saturate_cast<int>(v/(1.*i));
+                    sdiv_data.upload(Mat(1, 256, CV_32SC1, sdiv_table));
+                }
+
+                v = hrange << hsv_shift;
+                for (int i = 1; i < 256; i++ )
+                    hdiv_table[i] = saturate_cast<int>(v/(6.*i));
+
+                hdiv_data.upload(Mat(1, 256, CV_32SC1, hdiv_table));
+                initialized = true;
+            }
+
+            fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
+            return;
+        }
+
+        fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
+        break;
+    }
+    /*
     case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
     case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
     */
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 210d1b766..697526157 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -76,6 +76,7 @@ enum
 {
     yuv_shift  = 14,
     xyz_shift  = 12,
+    hsv_shift = 12,
     R2Y        = 4899,
     G2Y        = 9617,
     B2Y        = 1868,
@@ -544,3 +545,126 @@ __kernel void Gray2BGR5x5(int cols, int rows, int src_step, int dst_step, int bi
 #endif
     }
 }
+
+///////////////////////////////////// RGB <-> HSV //////////////////////////////////////
+
+#ifdef DEPTH_0
+
+__kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const uchar * src, __global uchar * dst,
+                      int src_offset, int dst_offset,
+                      __constant int * sdiv_table, __constant int * hdiv_table)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        int b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
+        int h, s, v = b;
+        int vmin = b, diff;
+        int vr, vg;
+
+        v = max( v, g );
+        v = max( v, r );
+        vmin = min( vmin, g );
+        vmin = min( vmin, r );
+
+        diff = v - vmin;
+        vr = v == r ? -1 : 0;
+        vg = v == g ? -1 : 0;
+
+        s = (diff * sdiv_table[v] + (1 << (hsv_shift-1))) >> hsv_shift;
+        h = (vr & (g - b)) +
+            (~vr & ((vg & (b - r + 2 * diff)) + ((~vg) & (r - g + 4 * diff))));
+        h = (h * hdiv_table[diff] + (1 << (hsv_shift-1))) >> hsv_shift;
+        h += h < 0 ? hrange : 0;
+
+        dst[dst_idx] = convert_uchar_sat_rte(h);
+        dst[dst_idx + 1] = (uchar)s;
+        dst[dst_idx + 2] = (uchar)v;
+    }
+}
+
+#elif defined DEPTH_5
+
+__kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const float * src, __global float * dst,
+                      int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        float b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
+        float h, s, v;
+
+        float vmin, diff;
+
+        v = vmin = r;
+        if( v < g ) v = g;
+        if( v < b ) v = b;
+        if( vmin > g ) vmin = g;
+        if( vmin > b ) vmin = b;
+
+        diff = v - vmin;
+        s = diff/(float)(fabs(v) + FLT_EPSILON);
+        diff = (float)(60./(diff + FLT_EPSILON));
+        if( v == r )
+            h = (g - b)*diff;
+        else if( v == g )
+            h = (b - r)*diff + 120.f;
+        else
+            h = (r - g)*diff + 240.f;
+
+        if( h < 0 ) h += 360.f;
+
+        dst[dst_idx] = h*hscale;
+        dst[dst_idx + 1] = s;
+        dst[dst_idx + 2] = v;
+    }
+}
+
+#endif
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 732a0f8e4..83a648530 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -181,6 +181,20 @@ OCL_TEST_P(CvtColor, XYZ2BGR) { doTest(3, 3, CVTCODE(XYZ2BGR)); }
 OCL_TEST_P(CvtColor, XYZ2RGBA) { doTest(3, 4, CVTCODE(XYZ2RGB)); }
 OCL_TEST_P(CvtColor, XYZ2BGRA) { doTest(3, 4, CVTCODE(XYZ2BGR)); }
 
+// RGB <-> HSV
+
+typedef CvtColor CvtColor8u32f;
+
+OCL_TEST_P(CvtColor8u32f, RGB2HSV) { doTest(3, 3, CVTCODE(RGB2HSV)); }
+OCL_TEST_P(CvtColor8u32f, BGR2HSV) { doTest(3, 3, CVTCODE(BGR2HSV)); }
+OCL_TEST_P(CvtColor8u32f, RGBA2HSV) { doTest(4, 3, CVTCODE(RGB2HSV)); }
+OCL_TEST_P(CvtColor8u32f, BGRA2HSV) { doTest(4, 3, CVTCODE(BGR2HSV)); }
+
+OCL_TEST_P(CvtColor8u32f, RGB2HSV_FULL) { doTest(3, 3, CVTCODE(RGB2HSV_FULL)); }
+OCL_TEST_P(CvtColor8u32f, BGR2HSV_FULL) { doTest(3, 3, CVTCODE(BGR2HSV_FULL)); }
+OCL_TEST_P(CvtColor8u32f, RGBA2HSV_FULL) { doTest(4, 3, CVTCODE(RGB2HSV_FULL)); }
+OCL_TEST_P(CvtColor8u32f, BGRA2HSV_FULL) { doTest(4, 3, CVTCODE(BGR2HSV_FULL)); }
+
 // RGB5x5 <-> RGB
 
 typedef CvtColor CvtColor8u;
@@ -246,6 +260,9 @@ OCL_TEST_P(CvtColor_YUV420, YUV2BGR_NV12) { doTest(1, 3, CV_YUV2BGR_NV12); }
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u,
                             testing::Combine(testing::Values(MatDepth(CV_8U)), Bool()));
 
+INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor8u32f,
+                            testing::Combine(testing::Values(MatDepth(CV_8U), MatDepth(CV_32F)), Bool()));
+
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, CvtColor,
                             testing::Combine(
                                 testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_32F)),

From ee7c74ceeb2b28fdde0229b9f51d33071110a85d Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@aldebaran-robotics.com>
Date: Wed, 13 Nov 2013 14:20:09 +0100
Subject: [PATCH 33/45] return when OpenCV is found

---
 cmake/templates/OpenCVConfig.cmake.in | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in
index 094cbd4d0..5d5635119 100644
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@@ -37,15 +37,17 @@
 #
 # ===================================================================================
 
+if(OpenCV_FOUND)
+  return()
+endif()
+set(OpenCV_FOUND TRUE)
+
 set(modules_file_suffix "")
 if(ANDROID)
   string(REPLACE - _ modules_file_suffix "_${ANDROID_NDK_ABI_NAME}")
 endif()
 
-if(NOT OpenCV_FOUND)
-  include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
-  set(OpenCV_FOUND TRUE)
-endif()
+include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
 
 # TODO All things below should be reviewed. What is about of moving this code into related modules (special vars/hooks/files)
 

From 3bdd9626f3ad0e8ee9719e19e125292b22ad3594 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 13 Nov 2013 17:41:40 +0400
Subject: [PATCH 34/45] added RGB[A] -> HLS[FULL] conversion

---
 modules/ocl/src/color.cpp           |  4 +-
 modules/ocl/src/opencl/cvt_color.cl | 91 +++++++++++++++++++++++++----
 modules/ocl/test/test_color.cpp     | 18 +++++-
 3 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index c3f58c92c..17dcaa31f 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -440,11 +440,11 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
                 initialized = true;
             }
 
-            fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
+            fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=0", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
             return;
         }
 
-        fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
+        fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f -D hrange=0", hrange*(1.f/360.f)));
         break;
     }
     /*
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 697526157..928f46559 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -636,35 +636,104 @@ __kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
 
 #endif
 
+///////////////////////////////////// RGB <-> HLS //////////////////////////////////////
 
+#ifdef DEPTH_0
 
+__kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const uchar * src, __global uchar * dst,
+                      int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
 
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
 
+        float b = src[src_idx + bidx]*(1/255.f), g = src[src_idx + 1]*(1/255.f), r = src[src_idx + (bidx^2)]*(1/255.f);
+        float h = 0.f, s = 0.f, l;
+        float vmin, vmax, diff;
 
+        vmax = vmin = r;
+        if (vmax < g) vmax = g;
+        if (vmax < b) vmax = b;
+        if (vmin > g) vmin = g;
+        if (vmin > b) vmin = b;
 
+        diff = vmax - vmin;
+        l = (vmax + vmin)*0.5f;
 
+        if (diff > FLT_EPSILON)
+        {
+            s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
+            diff = 60.f/diff;
 
+            if( vmax == r )
+                h = (g - b)*diff;
+            else if( vmax == g )
+                h = (b - r)*diff + 120.f;
+            else
+                h = (r - g)*diff + 240.f;
 
+            if( h < 0.f ) h += 360.f;
+        }
 
+        dst[dst_idx] = convert_uchar_sat_rte(h*hscale);
+        dst[dst_idx + 1] = convert_uchar_sat_rte(l*255.f);
+        dst[dst_idx + 2] = convert_uchar_sat_rte(s*255.f);
+    }
+}
 
+#elif defined DEPTH_5
 
+__kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const float * src, __global float * dst,
+                      int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
 
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
 
+        float b = src[src_idx + bidx], g = src[src_idx + 1], r = src[src_idx + (bidx^2)];
+        float h = 0.f, s = 0.f, l;
+        float vmin, vmax, diff;
 
+        vmax = vmin = r;
+        if (vmax < g) vmax = g;
+        if (vmax < b) vmax = b;
+        if (vmin > g) vmin = g;
+        if (vmin > b) vmin = b;
 
+        diff = vmax - vmin;
+        l = (vmax + vmin)*0.5f;
 
+        if (diff > FLT_EPSILON)
+        {
+            s = l < 0.5f ? diff/(vmax + vmin) : diff/(2 - vmax - vmin);
+            diff = 60.f/diff;
 
+            if( vmax == r )
+                h = (g - b)*diff;
+            else if( vmax == g )
+                h = (b - r)*diff + 120.f;
+            else
+                h = (r - g)*diff + 240.f;
 
+            if( h < 0.f ) h += 360.f;
+        }
 
+        dst[dst_idx] = h*hscale;
+        dst[dst_idx + 1] = l;
+        dst[dst_idx + 2] = s;
+    }
+}
 
-
-
-
-
-
-
-
-
-
-
-
+#endif
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 83a648530..b1128c79c 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -95,7 +95,7 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
         generateOclMat(gdst_whole, gdst_roi, dst, roiSize, dstBorder);
     }
 
-    void Near(double threshold = 1e-3)
+    void Near(double threshold)
     {
         Mat whole, roi;
         gdst_whole.download(whole);
@@ -105,7 +105,7 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
         EXPECT_MAT_NEAR(dst_roi, roi, threshold);
     }
 
-    void doTest(int channelsIn, int channelsOut, int code)
+    void doTest(int channelsIn, int channelsOut, int code, double threshold = 1e-3)
     {
         for (int j = 0; j < LOOP_TIMES; j++)
         {
@@ -114,7 +114,7 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
             cvtColor(src_roi, dst_roi, code, channelsOut);
             ocl::cvtColor(gsrc_roi, gdst_roi, code, channelsOut);
 
-            Near();
+            Near(threshold);
         }
     }
 };
@@ -195,6 +195,18 @@ OCL_TEST_P(CvtColor8u32f, BGR2HSV_FULL) { doTest(3, 3, CVTCODE(BGR2HSV_FULL)); }
 OCL_TEST_P(CvtColor8u32f, RGBA2HSV_FULL) { doTest(4, 3, CVTCODE(RGB2HSV_FULL)); }
 OCL_TEST_P(CvtColor8u32f, BGRA2HSV_FULL) { doTest(4, 3, CVTCODE(BGR2HSV_FULL)); }
 
+// RGB <-> HLS
+
+OCL_TEST_P(CvtColor8u32f, RGB2HLS) { doTest(3, 3, CVTCODE(RGB2HLS), 1); }
+OCL_TEST_P(CvtColor8u32f, BGR2HLS) { doTest(3, 3, CVTCODE(BGR2HLS), 1); }
+OCL_TEST_P(CvtColor8u32f, RGBA2HLS) { doTest(4, 3, CVTCODE(RGB2HLS), 1); }
+OCL_TEST_P(CvtColor8u32f, BGRA2HLS) { doTest(4, 3, CVTCODE(BGR2HLS), 1); }
+
+OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { doTest(3, 3, CVTCODE(RGB2HLS_FULL), 1); }
+OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { doTest(3, 3, CVTCODE(BGR2HLS_FULL), 1); }
+OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { doTest(4, 3, CVTCODE(RGB2HLS_FULL), 1); }
+OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { doTest(4, 3, CVTCODE(BGR2HLS_FULL), 1); }
+
 // RGB5x5 <-> RGB
 
 typedef CvtColor CvtColor8u;

From 98915e06bc7d1bb93f29e0c99f03264606fba6a0 Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 13 Nov 2013 19:08:37 +0400
Subject: [PATCH 35/45] added HSV -> RGB[A][FULL] conversion

---
 modules/ocl/src/color.cpp           |  30 +++++--
 modules/ocl/src/opencl/cvt_color.cl | 118 ++++++++++++++++++++++++++++
 modules/ocl/test/test_color.cpp     |  26 ++++--
 3 files changed, 160 insertions(+), 14 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index 17dcaa31f..82ed8044f 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -82,9 +82,12 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
 }
 
 static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
-                         const oclMat & data = oclMat())
+                         const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
 {
     std::string build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
+    if (!additionalOptions.empty())
+        build_options += additionalOptions;
+
     int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
     int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
 
@@ -391,7 +394,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         }
         oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32FC1 : CV_32SC1, pdata);
 
-        toRGB_caller(src, dst, bidx, "XYZ2RGB", oclCoeffs);
+        toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs);
         break;
     }
     case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
@@ -440,17 +443,32 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
                 initialized = true;
             }
 
-            fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=0", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
+            fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
             return;
         }
 
-        fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f -D hrange=0", hrange*(1.f/360.f)));
+        fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
         break;
     }
-    /*
     case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
     case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
-    */
+    {
+        if (dcn <= 0)
+            dcn = 3;
+        CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
+        bidx = code == CV_HSV2BGR || code == CV_HLS2BGR ||
+            code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2;
+        int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB ||
+            code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255;
+        bool is_hsv = code == CV_HSV2BGR || code == CV_HSV2RGB ||
+                code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL;
+
+        dst.create(sz, CV_MAKETYPE(depth, dcn));
+
+        std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
+        toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
+        break;
+    }
     default:
         CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
     }
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 928f46559..ae271be1d 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -46,6 +46,14 @@
 
 /**************************************PUBLICFUNC*************************************/
 
+#ifndef hscale
+#define hscale 0
+#endif
+
+#ifndef hrange
+#define hrange 0
+#endif
+
 #ifdef DEPTH_0
 #define DATA_TYPE uchar
 #define COEFF_TYPE int
@@ -548,6 +556,8 @@ __kernel void Gray2BGR5x5(int cols, int rows, int src_step, int dst_step, int bi
 
 ///////////////////////////////////// RGB <-> HSV //////////////////////////////////////
 
+__constant int sector_data[][3] = { {1, 3, 0}, { 1, 0, 2 }, { 3, 0, 1 }, { 0, 2, 1 }, { 0, 1, 3 }, { 2, 1, 0 } };
+
 #ifdef DEPTH_0
 
 __kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
@@ -590,6 +600,60 @@ __kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
     }
 }
 
+__kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const uchar * src, __global uchar * dst,
+                      int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        float h = src[src_idx], s = src[src_idx + 1]*(1/255.f), v = src[src_idx + 2]*(1/255.f);
+        float b, g, r;
+
+        if (s != 0)
+        {
+            float tab[4];
+            int sector;
+            h *= hscale;
+            if( h < 0 )
+                do h += 6; while( h < 0 );
+            else if( h >= 6 )
+                do h -= 6; while( h >= 6 );
+            sector = convert_int_sat_rtn(h);
+            h -= sector;
+            if( (unsigned)sector >= 6u )
+            {
+                sector = 0;
+                h = 0.f;
+            }
+
+            tab[0] = v;
+            tab[1] = v*(1.f - s);
+            tab[2] = v*(1.f - s*h);
+            tab[3] = v*(1.f - s*(1.f - h));
+
+            b = tab[sector_data[sector][0]];
+            g = tab[sector_data[sector][1]];
+            r = tab[sector_data[sector][2]];
+        }
+        else
+            b = g = r = v;
+
+        dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
+        dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
+        dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
+#endif
+    }
+}
+
 #elif defined DEPTH_5
 
 __kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
@@ -634,6 +698,60 @@ __kernel void RGB2HSV(int cols, int rows, int src_step, int dst_step, int bidx,
     }
 }
 
+__kernel void HSV2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const float * src, __global float * dst,
+                      int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        float h = src[src_idx], s = src[src_idx + 1], v = src[src_idx + 2];
+        float b, g, r;
+
+        if (s != 0)
+        {
+            float tab[4];
+            int sector;
+            h *= hscale;
+            if(h < 0)
+                do h += 6; while (h < 0);
+            else if (h >= 6)
+                do h -= 6; while (h >= 6);
+            sector = convert_int_sat_rtn(h);
+            h -= sector;
+            if ((unsigned)sector >= 6u)
+            {
+                sector = 0;
+                h = 0.f;
+            }
+
+            tab[0] = v;
+            tab[1] = v*(1.f - s);
+            tab[2] = v*(1.f - s*h);
+            tab[3] = v*(1.f - s*(1.f - h));
+
+            b = tab[sector_data[sector][0]];
+            g = tab[sector_data[sector][1]];
+            r = tab[sector_data[sector][2]];
+        }
+        else
+            b = g = r = v;
+
+        dst[dst_idx + bidx] = b;
+        dst[dst_idx + 1] = g;
+        dst[dst_idx + (bidx^2)] = r;
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
+#endif
+    }
+}
+
 #endif
 
 ///////////////////////////////////// RGB <-> HLS //////////////////////////////////////
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index b1128c79c..2b798d5c5 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -195,17 +195,27 @@ OCL_TEST_P(CvtColor8u32f, BGR2HSV_FULL) { doTest(3, 3, CVTCODE(BGR2HSV_FULL)); }
 OCL_TEST_P(CvtColor8u32f, RGBA2HSV_FULL) { doTest(4, 3, CVTCODE(RGB2HSV_FULL)); }
 OCL_TEST_P(CvtColor8u32f, BGRA2HSV_FULL) { doTest(4, 3, CVTCODE(BGR2HSV_FULL)); }
 
+OCL_TEST_P(CvtColor8u32f, HSV2RGB) { doTest(3, 3, CVTCODE(HSV2RGB), depth == CV_8U ? 1 : 4e-1); }
+OCL_TEST_P(CvtColor8u32f, HSV2BGR) { doTest(3, 3, CVTCODE(HSV2BGR), depth == CV_8U ? 1 : 4e-1); }
+OCL_TEST_P(CvtColor8u32f, HSV2RGBA) { doTest(3, 4, CVTCODE(HSV2RGB), depth == CV_8U ? 1 : 4e-1); }
+OCL_TEST_P(CvtColor8u32f, HSV2BGRA) { doTest(3, 4, CVTCODE(HSV2BGR), depth == CV_8U ? 1 : 4e-1); }
+
+OCL_TEST_P(CvtColor8u32f, HSV2RGB_FULL) { doTest(3, 3, CVTCODE(HSV2RGB_FULL), depth == CV_8U ? 1 : 4e-1); }
+OCL_TEST_P(CvtColor8u32f, HSV2BGR_FULL) { doTest(3, 3, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
+OCL_TEST_P(CvtColor8u32f, HSV2RGBA_FULL) { doTest(3, 4, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
+OCL_TEST_P(CvtColor8u32f, HSV2BGRA_FULL) { doTest(3, 4, CVTCODE(HSV2BGR_FULL), depth == CV_8U ? 1 : 4e-1); }
+
 // RGB <-> HLS
 
-OCL_TEST_P(CvtColor8u32f, RGB2HLS) { doTest(3, 3, CVTCODE(RGB2HLS), 1); }
-OCL_TEST_P(CvtColor8u32f, BGR2HLS) { doTest(3, 3, CVTCODE(BGR2HLS), 1); }
-OCL_TEST_P(CvtColor8u32f, RGBA2HLS) { doTest(4, 3, CVTCODE(RGB2HLS), 1); }
-OCL_TEST_P(CvtColor8u32f, BGRA2HLS) { doTest(4, 3, CVTCODE(BGR2HLS), 1); }
+OCL_TEST_P(CvtColor8u32f, RGB2HLS) { doTest(3, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
+OCL_TEST_P(CvtColor8u32f, BGR2HLS) { doTest(3, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
+OCL_TEST_P(CvtColor8u32f, RGBA2HLS) { doTest(4, 3, CVTCODE(RGB2HLS), depth == CV_8U ? 1 : 1e-3); }
+OCL_TEST_P(CvtColor8u32f, BGRA2HLS) { doTest(4, 3, CVTCODE(BGR2HLS), depth == CV_8U ? 1 : 1e-3); }
 
-OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { doTest(3, 3, CVTCODE(RGB2HLS_FULL), 1); }
-OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { doTest(3, 3, CVTCODE(BGR2HLS_FULL), 1); }
-OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { doTest(4, 3, CVTCODE(RGB2HLS_FULL), 1); }
-OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { doTest(4, 3, CVTCODE(BGR2HLS_FULL), 1); }
+OCL_TEST_P(CvtColor8u32f, RGB2HLS_FULL) { doTest(3, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
+OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { doTest(3, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
+OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { doTest(4, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
+OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { doTest(4, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
 
 // RGB5x5 <-> RGB
 

From 1b7c5b201d2d9718068da9676bd2e066dbc0584d Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Wed, 13 Nov 2013 19:35:24 +0400
Subject: [PATCH 36/45] added HLS -> RGB[A][FULL] conversion

---
 modules/ocl/src/color.cpp           |  24 +++----
 modules/ocl/src/opencl/cvt_color.cl | 107 ++++++++++++++++++++++++++++
 modules/ocl/test/test_color.cpp     |  22 ++++--
 3 files changed, 131 insertions(+), 22 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index 82ed8044f..b807afd41 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -234,8 +234,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5");
         break;
     }
-    case CV_RGB2GRAY: case CV_BGR2GRAY:
-    case CV_RGBA2GRAY: case CV_BGRA2GRAY:
+    case CV_RGB2GRAY: case CV_BGR2GRAY: case CV_RGBA2GRAY: case CV_BGRA2GRAY:
     {
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
@@ -243,8 +242,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         fromRGB_caller(src, dst, bidx, "RGB2Gray");
         break;
     }
-    case CV_GRAY2BGR:
-    case CV_GRAY2BGRA:
+    case CV_GRAY2BGR: case CV_GRAY2BGRA:
     {
         CV_Assert(scn == 1);
         dcn  = code == CV_GRAY2BGRA ? 4 : 3;
@@ -252,8 +250,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB_caller(src, dst, 0, "Gray2RGB");
         break;
     }
-    case CV_BGR2YUV:
-    case CV_RGB2YUV:
+    case CV_BGR2YUV: case CV_RGB2YUV:
     {
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2YUV ? 0 : 2;
@@ -261,8 +258,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         fromRGB_caller(src, dst, bidx, "RGB2YUV");
         break;
     }
-    case CV_YUV2BGR:
-    case CV_YUV2RGB:
+    case CV_YUV2BGR: case CV_YUV2RGB:
     {
         if( dcn <= 0 )
             dcn = 3;
@@ -285,8 +281,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
         break;
     }
-    case CV_BGR2YCrCb:
-    case CV_RGB2YCrCb:
+    case CV_BGR2YCrCb: case CV_RGB2YCrCb:
     {
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2YCrCb ? 0 : 2;
@@ -294,8 +289,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         fromRGB_caller(src, dst, bidx, "RGB2YCrCb");
         break;
     }
-    case CV_YCrCb2BGR:
-    case CV_YCrCb2RGB:
+    case CV_YCrCb2BGR: case CV_YCrCb2RGB:
     {
         if( dcn <= 0 )
             dcn = 3;
@@ -305,8 +299,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB_caller(src, dst, bidx, "YCrCb2RGB");
         break;
     }
-    case CV_BGR2XYZ:
-    case CV_RGB2XYZ:
+    case CV_BGR2XYZ: case CV_RGB2XYZ:
     {
         CV_Assert(scn == 3 || scn == 4);
         bidx = code == CV_BGR2XYZ ? 0 : 2;
@@ -350,8 +343,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs);
         break;
     }
-    case CV_XYZ2BGR:
-    case CV_XYZ2RGB:
+    case CV_XYZ2BGR: case CV_XYZ2RGB:
     {
         if (dcn <= 0)
             dcn = 3;
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index ae271be1d..70922c3ff 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -805,6 +805,59 @@ __kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
     }
 }
 
+__kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const uchar * src, __global uchar * dst,
+                      int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        float h = src[src_idx], l = src[src_idx + 1]*(1.f/255.f), s = src[src_idx + 2]*(1.f/255.f);
+        float b, g, r;
+
+        if (s != 0)
+        {
+            float tab[4];
+
+            float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
+            float p1 = 2*l - p2;
+
+            h *= hscale;
+            if( h < 0 )
+                do h += 6; while( h < 0 );
+            else if( h >= 6 )
+                do h -= 6; while( h >= 6 );
+
+            int sector = convert_int_sat_rtn(h);
+            h -= sector;
+
+            tab[0] = p2;
+            tab[1] = p1;
+            tab[2] = p1 + (p2 - p1)*(1-h);
+            tab[3] = p1 + (p2 - p1)*h;
+
+            b = tab[sector_data[sector][0]];
+            g = tab[sector_data[sector][1]];
+            r = tab[sector_data[sector][2]];
+        }
+        else
+            b = g = r = l;
+
+        dst[dst_idx + bidx] = convert_uchar_sat_rte(b*255.f);
+        dst[dst_idx + 1] = convert_uchar_sat_rte(g*255.f);
+        dst[dst_idx + (bidx^2)] = convert_uchar_sat_rte(r*255.f);
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
+#endif
+    }
+}
+
 #elif defined DEPTH_5
 
 __kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
@@ -854,4 +907,58 @@ __kernel void RGB2HLS(int cols, int rows, int src_step, int dst_step, int bidx,
     }
 }
 
+__kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
+                      __global const float * src, __global float * dst,
+                      int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        float h = src[src_idx], l = src[src_idx + 1], s = src[src_idx + 2];
+        float b, g, r;
+
+        if (s != 0)
+        {
+            float tab[4];
+            int sector;
+
+            float p2 = l <= 0.5f ? l*(1 + s) : l + s - l*s;
+            float p1 = 2*l - p2;
+
+            h *= hscale;
+            if( h < 0 )
+                do h += 6; while( h < 0 );
+            else if( h >= 6 )
+                do h -= 6; while( h >= 6 );
+
+            sector = convert_int_sat_rtn(h);
+            h -= sector;
+
+            tab[0] = p2;
+            tab[1] = p1;
+            tab[2] = p1 + (p2 - p1)*(1-h);
+            tab[3] = p1 + (p2 - p1)*h;
+
+            b = tab[sector_data[sector][0]];
+            g = tab[sector_data[sector][1]];
+            r = tab[sector_data[sector][2]];
+        }
+        else
+            b = g = r = l;
+
+        dst[dst_idx + bidx] = b;
+        dst[dst_idx + 1] = g;
+        dst[dst_idx + (bidx^2)] = r;
+#if dcn == 4
+        dst[dst_idx + 3] = MAX_NUM;
+#endif
+    }
+}
+
 #endif
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 2b798d5c5..935e97462 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -124,17 +124,17 @@ PARAM_TEST_CASE(CvtColor, MatDepth, bool)
 // RGB[A] <-> BGR[A]
 
 OCL_TEST_P(CvtColor, BGR2BGRA) { doTest(3, 4, CVTCODE(BGR2BGRA)); }
-OCL_TEST_P(CvtColor, RGB2RGBA) { doTest(3, 4, CVTCODE(BGR2BGRA)); }
+OCL_TEST_P(CvtColor, RGB2RGBA) { doTest(3, 4, CVTCODE(RGB2RGBA)); }
 OCL_TEST_P(CvtColor, BGRA2BGR) { doTest(4, 3, CVTCODE(BGRA2BGR)); }
-OCL_TEST_P(CvtColor, RGBA2RGB) { doTest(4, 3, CVTCODE(BGRA2BGR)); }
+OCL_TEST_P(CvtColor, RGBA2RGB) { doTest(4, 3, CVTCODE(RGBA2RGB)); }
 OCL_TEST_P(CvtColor, BGR2RGBA) { doTest(3, 4, CVTCODE(BGR2RGBA)); }
-OCL_TEST_P(CvtColor, RGB2BGRA) { doTest(3, 4, CVTCODE(BGR2RGBA)); }
+OCL_TEST_P(CvtColor, RGB2BGRA) { doTest(3, 4, CVTCODE(RGB2BGRA)); }
 OCL_TEST_P(CvtColor, RGBA2BGR) { doTest(4, 3, CVTCODE(RGBA2BGR)); }
-OCL_TEST_P(CvtColor, BGRA2RGB) { doTest(4, 3, CVTCODE(RGBA2BGR)); }
+OCL_TEST_P(CvtColor, BGRA2RGB) { doTest(4, 3, CVTCODE(BGRA2RGB)); }
 OCL_TEST_P(CvtColor, BGR2RGB) { doTest(3, 3, CVTCODE(BGR2RGB)); }
-OCL_TEST_P(CvtColor, RGB2BGR) { doTest(3, 3, CVTCODE(BGR2RGB)); }
+OCL_TEST_P(CvtColor, RGB2BGR) { doTest(3, 3, CVTCODE(RGB2BGR)); }
 OCL_TEST_P(CvtColor, BGRA2RGBA) { doTest(4, 4, CVTCODE(BGRA2RGBA)); }
-OCL_TEST_P(CvtColor, RGBA2BGRA) { doTest(4, 4, CVTCODE(BGRA2RGBA)); }
+OCL_TEST_P(CvtColor, RGBA2BGRA) { doTest(4, 4, CVTCODE(RGBA2BGRA)); }
 
 // RGB <-> Gray
 
@@ -217,6 +217,16 @@ OCL_TEST_P(CvtColor8u32f, BGR2HLS_FULL) { doTest(3, 3, CVTCODE(BGR2HLS_FULL), de
 OCL_TEST_P(CvtColor8u32f, RGBA2HLS_FULL) { doTest(4, 3, CVTCODE(RGB2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
 OCL_TEST_P(CvtColor8u32f, BGRA2HLS_FULL) { doTest(4, 3, CVTCODE(BGR2HLS_FULL), depth == CV_8U ? 1 : 1e-3); }
 
+OCL_TEST_P(CvtColor8u32f, HLS2RGB) { doTest(3, 3, CVTCODE(HLS2RGB), 1); }
+OCL_TEST_P(CvtColor8u32f, HLS2BGR) { doTest(3, 3, CVTCODE(HLS2BGR), 1); }
+OCL_TEST_P(CvtColor8u32f, HLS2RGBA) { doTest(3, 4, CVTCODE(HLS2RGB), 1); }
+OCL_TEST_P(CvtColor8u32f, HLS2BGRA) { doTest(3, 4, CVTCODE(HLS2BGR), 1); }
+
+OCL_TEST_P(CvtColor8u32f, HLS2RGB_FULL) { doTest(3, 3, CVTCODE(HLS2RGB_FULL), 1); }
+OCL_TEST_P(CvtColor8u32f, HLS2BGR_FULL) { doTest(3, 3, CVTCODE(HLS2BGR_FULL), 1); }
+OCL_TEST_P(CvtColor8u32f, HLS2RGBA_FULL) { doTest(3, 4, CVTCODE(HLS2RGB_FULL), 1); }
+OCL_TEST_P(CvtColor8u32f, HLS2BGRA_FULL) { doTest(3, 4, CVTCODE(HLS2BGR_FULL), 1); }
+
 // RGB5x5 <-> RGB
 
 typedef CvtColor CvtColor8u;

From fe76b2116a76b6bc1dab0fac4dec35366d45da1e Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Thu, 14 Nov 2013 12:02:32 +0400
Subject: [PATCH 37/45] added RGBA <-> mRGBA

---
 modules/ocl/src/color.cpp           |  9 +++++
 modules/ocl/src/opencl/cvt_color.cl | 53 +++++++++++++++++++++++++++++
 modules/ocl/test/test_color.cpp     |  5 +++
 3 files changed, 67 insertions(+)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index b807afd41..f41d29782 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -461,6 +461,15 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
         break;
     }
+    case CV_RGBA2mRGBA: case CV_mRGBA2RGBA:
+        {
+            CV_Assert(scn == 4 && depth == CV_8U);
+            dst.create(sz, CV_MAKETYPE(depth, 4));
+            std::string kernelName = code == CV_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA";
+
+            fromRGB_caller(src, dst, 0, kernelName);
+            break;
+        }
     default:
         CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
     }
diff --git a/modules/ocl/src/opencl/cvt_color.cl b/modules/ocl/src/opencl/cvt_color.cl
index 70922c3ff..bf3b6cfa7 100644
--- a/modules/ocl/src/opencl/cvt_color.cl
+++ b/modules/ocl/src/opencl/cvt_color.cl
@@ -962,3 +962,56 @@ __kernel void HLS2RGB(int cols, int rows, int src_step, int dst_step, int bidx,
 }
 
 #endif
+
+/////////////////////////// RGBA <-> mRGBA (alpha premultiplied) //////////////
+
+#ifdef DEPTH_0
+
+__kernel void RGBA2mRGBA(int cols, int rows, int src_step, int dst_step,
+                        int bidx, __global const uchar * src, __global uchar * dst,
+                        int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        uchar v0 = src[src_idx], v1 = src[src_idx + 1];
+        uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3];
+
+        dst[dst_idx] = (v0 * v3 + HALF_MAX) / MAX_NUM;
+        dst[dst_idx + 1] = (v1 * v3 + HALF_MAX) / MAX_NUM;
+        dst[dst_idx + 2] = (v2 * v3 + HALF_MAX) / MAX_NUM;
+        dst[dst_idx + 3] = v3;
+    }
+}
+
+__kernel void mRGBA2RGBA(int cols, int rows, int src_step, int dst_step, int bidx,
+                        __global const uchar * src, __global uchar * dst,
+                        int src_offset, int dst_offset)
+{
+    int x = get_global_id(0);
+    int y = get_global_id(1);
+
+    if (y < rows && x < cols)
+    {
+        x <<= 2;
+        int src_idx = mad24(y, src_step, src_offset + x);
+        int dst_idx = mad24(y, dst_step, dst_offset + x);
+
+        uchar v0 = src[src_idx], v1 = src[src_idx + 1];
+        uchar v2 = src[src_idx + 2], v3 = src[src_idx + 3];
+        uchar v3_half = v3 / 2;
+
+        dst[dst_idx] = v3 == 0 ? 0 : (v0 * MAX_NUM + v3_half) / v3;
+        dst[dst_idx + 1] = v3 == 0 ? 0 : (v1 * MAX_NUM + v3_half) / v3;
+        dst[dst_idx + 2] = v3 == 0 ? 0 : (v2 * MAX_NUM + v3_half) / v3;
+        dst[dst_idx + 3] = v3;
+    }
+}
+
+#endif
diff --git a/modules/ocl/test/test_color.cpp b/modules/ocl/test/test_color.cpp
index 935e97462..7c4fe3942 100644
--- a/modules/ocl/test/test_color.cpp
+++ b/modules/ocl/test/test_color.cpp
@@ -259,6 +259,11 @@ OCL_TEST_P(CvtColor8u, BGR5552GRAY) { doTest(2, 1, CVTCODE(BGR5552GRAY)); }
 OCL_TEST_P(CvtColor8u, GRAY2BGR565) { doTest(1, 2, CVTCODE(GRAY2BGR565)); }
 OCL_TEST_P(CvtColor8u, GRAY2BGR555) { doTest(1, 2, CVTCODE(GRAY2BGR555)); }
 
+// RGBA <-> mRGBA
+
+OCL_TEST_P(CvtColor8u, RGBA2mRGBA) { doTest(4, 4, CVTCODE(RGBA2mRGBA)); }
+OCL_TEST_P(CvtColor8u, mRGBA2RGBA) { doTest(4, 4, CVTCODE(mRGBA2RGBA)); }
+
 // YUV -> RGBA_NV12
 
 struct CvtColor_YUV420 :

From 8e944cfe70679f2c605899d354606d09bec8ccb0 Mon Sep 17 00:00:00 2001
From: Vincent Rabaud <vrabaud@aldebaran-robotics.com>
Date: Thu, 14 Nov 2013 11:45:42 +0100
Subject: [PATCH 38/45] cleaner fix

---
 cmake/templates/OpenCVConfig.cmake.in | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/cmake/templates/OpenCVConfig.cmake.in b/cmake/templates/OpenCVConfig.cmake.in
index 5d5635119..91791b64a 100644
--- a/cmake/templates/OpenCVConfig.cmake.in
+++ b/cmake/templates/OpenCVConfig.cmake.in
@@ -16,7 +16,6 @@
 #    If the module is found then OPENCV_<MODULE>_FOUND is set to TRUE.
 #
 #    This file will define the following variables:
-#      - OpenCV_FOUND                    : Set to TRUE is OpenCV was find_packaged before
 #      - OpenCV_LIBS                     : The list of all imported targets for OpenCV modules.
 #      - OpenCV_INCLUDE_DIRS             : The OpenCV include directories.
 #      - OpenCV_COMPUTE_CAPABILITIES     : The version of compute capability
@@ -37,17 +36,14 @@
 #
 # ===================================================================================
 
-if(OpenCV_FOUND)
-  return()
-endif()
-set(OpenCV_FOUND TRUE)
-
 set(modules_file_suffix "")
 if(ANDROID)
   string(REPLACE - _ modules_file_suffix "_${ANDROID_NDK_ABI_NAME}")
 endif()
 
-include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
+if(NOT TARGET opencv_core)
+  include(${CMAKE_CURRENT_LIST_DIR}/OpenCVModules${modules_file_suffix}.cmake)
+endif()
 
 # TODO All things below should be reviewed. What is about of moving this code into related modules (special vars/hooks/files)
 

From 3b7acf26573b333fb15962a8c771f8544f46d554 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Thu, 14 Nov 2013 17:25:46 +0400
Subject: [PATCH 39/45] Fix building with FFmpeg 0.8.

avformat_find_stream_info was introduced in Libav avformat 53.3,
but it was only merged to FFmpeg in avformat 53.6.

In Libav avformat 53.3-53.5 av_find_stream_info is not removed
(only deprecated), so this shouldn't break building with that.
---
 modules/highgui/src/cap_ffmpeg_impl.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/highgui/src/cap_ffmpeg_impl.hpp b/modules/highgui/src/cap_ffmpeg_impl.hpp
index 2d8ad2223..151a0cac2 100644
--- a/modules/highgui/src/cap_ffmpeg_impl.hpp
+++ b/modules/highgui/src/cap_ffmpeg_impl.hpp
@@ -546,7 +546,7 @@ bool CvCapture_FFMPEG::open( const char* _filename )
         goto exit_func;
     }
     err =
-#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 3, 0)
+#if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 6, 0)
     avformat_find_stream_info(ic, NULL);
 #else
     av_find_stream_info(ic);
@@ -2114,7 +2114,7 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma
     if (err < 0)
         return false;
 
-    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 3, 0)
+    #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 6, 0)
         err = avformat_find_stream_info(ctx_, 0);
     #else
         err = av_find_stream_info(ctx_);

From 428fb726251c1ab06e7056ceca2e55b7146c4ef9 Mon Sep 17 00:00:00 2001
From: Roman Donchenko <roman.donchenko@itseez.com>
Date: Thu, 14 Nov 2013 19:14:38 +0400
Subject: [PATCH 40/45] Made the generated file templates' naming more
 consistent.

The general convention is <output file name> + ".in".
---
 cmake/OpenCVGenHeaders.cmake                                 | 2 +-
 cmake/OpenCVGenPkgconfig.cmake                               | 4 +++-
 cmake/templates/{cvconfig.h.cmake => cvconfig.h.in}          | 0
 cmake/templates/{opencv-XXX.pc.cmake.in => opencv-XXX.pc.in} | 0
 4 files changed, 4 insertions(+), 2 deletions(-)
 rename cmake/templates/{cvconfig.h.cmake => cvconfig.h.in} (100%)
 rename cmake/templates/{opencv-XXX.pc.cmake.in => opencv-XXX.pc.in} (100%)

diff --git a/cmake/OpenCVGenHeaders.cmake b/cmake/OpenCVGenHeaders.cmake
index c7129fefa..35da0fb4b 100644
--- a/cmake/OpenCVGenHeaders.cmake
+++ b/cmake/OpenCVGenHeaders.cmake
@@ -1,5 +1,5 @@
 # platform-specific config file
-configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.cmake" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h")
+configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/cvconfig.h.in" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h")
 
 # ----------------------------------------------------------------------------
 #  opencv_modules.hpp based on actual modules list
diff --git a/cmake/OpenCVGenPkgconfig.cmake b/cmake/OpenCVGenPkgconfig.cmake
index a36b70e94..cd54f11bf 100644
--- a/cmake/OpenCVGenPkgconfig.cmake
+++ b/cmake/OpenCVGenPkgconfig.cmake
@@ -76,7 +76,9 @@ if(INSTALL_TO_MANGLED_PATHS)
 else()
   set(OPENCV_PC_FILE_NAME opencv.pc)
 endif()
-configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/opencv-XXX.pc.cmake.in" "${CMAKE_BINARY_DIR}/unix-install/${OPENCV_PC_FILE_NAME}" @ONLY IMMEDIATE)
+configure_file("${OpenCV_SOURCE_DIR}/cmake/templates/opencv-XXX.pc.in"
+               "${CMAKE_BINARY_DIR}/unix-install/${OPENCV_PC_FILE_NAME}"
+               @ONLY IMMEDIATE)
 
 if(UNIX AND NOT ANDROID)
   install(FILES ${CMAKE_BINARY_DIR}/unix-install/${OPENCV_PC_FILE_NAME} DESTINATION ${OPENCV_LIB_INSTALL_PATH}/pkgconfig)
diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.in
similarity index 100%
rename from cmake/templates/cvconfig.h.cmake
rename to cmake/templates/cvconfig.h.in
diff --git a/cmake/templates/opencv-XXX.pc.cmake.in b/cmake/templates/opencv-XXX.pc.in
similarity index 100%
rename from cmake/templates/opencv-XXX.pc.cmake.in
rename to cmake/templates/opencv-XXX.pc.in

From cc237b7aba0e70498ad3ebb7f4c579875e2cf20a Mon Sep 17 00:00:00 2001
From: Ilya Lavrenov <ilya.lavrenov@itseez.com>
Date: Fri, 15 Nov 2013 13:48:14 +0400
Subject: [PATCH 41/45] generalized ocl::resize for all data types
 (INTER_NEAREST mode)

---
 modules/ocl/src/imgproc.cpp              | 113 +++++---------
 modules/ocl/src/opencl/imgproc_resize.cl | 189 ++++++-----------------
 2 files changed, 82 insertions(+), 220 deletions(-)

diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp
index ebe94d992..141325b17 100644
--- a/modules/ocl/src/imgproc.cpp
+++ b/modules/ocl/src/imgproc.cpp
@@ -282,96 +282,63 @@ namespace cv
 
         static void resize_gpu( const oclMat &src, oclMat &dst, double fx, double fy, int interpolation)
         {
-            CV_Assert( (src.channels() == dst.channels()) );
-            Context *clCxt = src.clCxt;
-            float ifx = 1. / fx;
-            float ify = 1. / fy;
-            double ifx_d = 1. / fx;
-            double ify_d = 1. / fy;
-            int srcStep_in_pixel = src.step1() / src.oclchannels();
-            int srcoffset_in_pixel = src.offset / src.elemSize();
-            int dstStep_in_pixel = dst.step1() / dst.oclchannels();
-            int dstoffset_in_pixel = dst.offset / dst.elemSize();
+            float ifx = 1.f / fx, ify = 1.f / fy;
+            int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
+            int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
+            int ocn = interpolation == INTER_LINEAR ? dst.oclchannels() : -1;
+            int depth = interpolation == INTER_LINEAR ? dst.depth() : -1;
 
-            string kernelName;
-            if (interpolation == INTER_LINEAR)
-                kernelName = "resizeLN";
-            else if (interpolation == INTER_NEAREST)
-                kernelName = "resizeNN";
+            const char * const interMap[] = { "NN", "LN", "CUBIC", "AREA", "LAN4" };
+            std::string kernelName = std::string("resize") + interMap[interpolation];
+
+            const char * const typeMap[] = { "uchar", "uchar", "ushort", "ushort", "int", "int", "double" };
+            const char * const channelMap[] = { "" , "", "2", "4", "4" };
+            std::string buildOption = format("-D %s -D T=%s%s", interMap[interpolation], typeMap[dst.depth()], channelMap[dst.oclchannels()]);
 
             //TODO: improve this kernel
             size_t blkSizeX = 16, blkSizeY = 16;
             size_t glbSizeX;
-            if (src.type() == CV_8UC1)
+            if (src.type() == CV_8UC1 && interpolation == INTER_LINEAR)
             {
                 size_t cols = (dst.cols + dst.offset % 4 + 3) / 4;
                 glbSizeX = cols % blkSizeX == 0 && cols != 0 ? cols : (cols / blkSizeX + 1) * blkSizeX;
             }
             else
-                glbSizeX = dst.cols % blkSizeX == 0 && dst.cols != 0 ? dst.cols : (dst.cols / blkSizeX + 1) * blkSizeX;
+                glbSizeX = dst.cols;
 
-            size_t glbSizeY = dst.rows % blkSizeY == 0 && dst.rows != 0 ? dst.rows : (dst.rows / blkSizeY + 1) * blkSizeY;
-            size_t globalThreads[3] = {glbSizeX, glbSizeY, 1};
-            size_t localThreads[3] = {blkSizeX, blkSizeY, 1};
+            size_t globalThreads[3] = { glbSizeX, dst.rows, 1 };
+            size_t localThreads[3] = { blkSizeX, blkSizeY, 1 };
 
-            vector< pair<size_t, const void *> > args;
-            if (interpolation == INTER_NEAREST)
-            {
-                args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
-                args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
-                if (src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
-                {
-                    args.push_back( make_pair(sizeof(cl_double), (void *)&ifx_d));
-                    args.push_back( make_pair(sizeof(cl_double), (void *)&ify_d));
-                }
-                else
-                {
-                    args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
-                    args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
-                }
-            }
-            else
-            {
-                args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
-                args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dstoffset_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&srcoffset_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dstStep_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&srcStep_in_pixel));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
-                args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
-                args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
-                args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
-            }
+            std::vector< std::pair<size_t, const void *> > args;
+            args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data));
+            args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&dst_offset));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&src_offset));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&src_step));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&dst.cols));
+            args.push_back( make_pair(sizeof(cl_int), (void *)&dst.rows));
+            args.push_back( make_pair(sizeof(cl_float), (void *)&ifx));
+            args.push_back( make_pair(sizeof(cl_float), (void *)&ify));
 
-            openCLExecuteKernel(clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
+            openCLExecuteKernel(src.clCxt, &imgproc_resize, kernelName, globalThreads, localThreads, args,
+                                ocn, depth, buildOption.c_str());
         }
 
-        void resize(const oclMat &src, oclMat &dst, Size dsize,
-                    double fx, double fy, int interpolation)
+        void resize(const oclMat &src, oclMat &dst, Size dsize, double fx, double fy, int interpolation)
         {
             CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3 || src.type() == CV_8UC4
                       || src.type() == CV_32FC1 || src.type() == CV_32FC3 || src.type() == CV_32FC4);
             CV_Assert(interpolation == INTER_LINEAR || interpolation == INTER_NEAREST);
-            CV_Assert( src.size().area() > 0 );
-            CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );
+            CV_Assert(dsize.area() > 0 || (fx > 0 && fy > 0));
 
-            if (!(dsize == Size()) && (fx > 0 && fy > 0))
-                if (dsize.width != (int)(src.cols * fx) || dsize.height != (int)(src.rows * fy))
-                    CV_Error(CV_StsUnmatchedSizes, "invalid dsize and fx, fy!");
-
-            if ( dsize == Size() )
+            if (dsize.area() == 0)
+            {
                 dsize = Size(saturate_cast<int>(src.cols * fx), saturate_cast<int>(src.rows * fy));
+                CV_Assert(dsize.area() > 0);
+            }
             else
             {
                 fx = (double)dsize.width / src.cols;
@@ -380,13 +347,7 @@ namespace cv
 
             dst.create(dsize, src.type());
 
-            if ( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR )
-            {
-                resize_gpu( src, dst, fx, fy, interpolation);
-                return;
-            }
-
-            CV_Error(CV_StsUnsupportedFormat, "Non-supported interpolation method");
+            resize_gpu( src, dst, fx, fy, interpolation);
         }
 
         ////////////////////////////////////////////////////////////////////////
diff --git a/modules/ocl/src/opencl/imgproc_resize.cl b/modules/ocl/src/opencl/imgproc_resize.cl
index 0d4cbedf6..4af900043 100644
--- a/modules/ocl/src/opencl/imgproc_resize.cl
+++ b/modules/ocl/src/opencl/imgproc_resize.cl
@@ -45,7 +45,7 @@
 
 
 // resize kernel
-// Currently, CV_8UC1  CV_8UC4  CV_32FC1 and CV_32FC4are supported.
+// Currently, CV_8UC1, CV_8UC4, CV_32FC1 and CV_32FC4 are supported.
 // We shall support other types later if necessary.
 
 #ifdef DOUBLE_SUPPORT
@@ -54,20 +54,18 @@
 #elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
-#define F double
-#else
-#define F float
 #endif
 
-
 #define INTER_RESIZE_COEF_BITS 11
 #define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
 #define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
 #define CAST_SCALE (1.0f/(1<<CAST_BITS))
 #define INC(x,l) ((x+1) >= (l) ? (x):((x)+1))
 
+#ifdef LN
+
 __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restrict src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
+                     int dst_offset, int src_offset,int dst_step, int src_step,
                      int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
 {
     int gx = get_global_id(0);
@@ -75,7 +73,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
 
     float4  sx, u, xf;
     int4 x, DX;
-    gx = (gx<<2) - (dstoffset_in_pixel&3);
+    gx = (gx<<2) - (dst_offset&3);
     DX = (int4)(gx, gx+1, gx+2, gx+3);
     sx = (convert_float4(DX) + 0.5f) * ifx - 0.5f;
     xf = floor(sx);
@@ -113,10 +111,10 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
     int4 val1, val2, val;
     int4 sdata1, sdata2, sdata3, sdata4;
 
-    int4 pos1 = mad24((int4)y, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel);
-    int4 pos2 = mad24((int4)y, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel);
-    int4 pos3 = mad24((int4)y_, (int4)srcstep_in_pixel, x+(int4)srcoffset_in_pixel);
-    int4 pos4 = mad24((int4)y_, (int4)srcstep_in_pixel, x_+(int4)srcoffset_in_pixel);
+    int4 pos1 = mad24((int4)y, (int4)src_step, x+(int4)src_offset);
+    int4 pos2 = mad24((int4)y, (int4)src_step, x_+(int4)src_offset);
+    int4 pos3 = mad24((int4)y_, (int4)src_step, x+(int4)src_offset);
+    int4 pos4 = mad24((int4)y_, (int4)src_step, x_+(int4)src_offset);
 
     sdata1.s0 = src[pos1.s0];
     sdata1.s1 = src[pos1.s1];
@@ -144,12 +142,12 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
 
     val = ((val + (1<<(CAST_BITS-1))) >> CAST_BITS);
 
-    pos4 = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel);
+    pos4 = mad24(dy, dst_step, gx+dst_offset);
     pos4.y++;
     pos4.z+=2;
     pos4.w+=3;
     uchar4 uval = convert_uchar4_sat(val);
-        int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0);
+        int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dst_offset&3)==0);
     if(con)
     {
         *(__global uchar4*)(dst + pos4.x)=uval;
@@ -176,7 +174,7 @@ __kernel void resizeLN_C1_D0(__global uchar * dst, __global uchar const * restri
 }
 
 __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
+                     int dst_offset, int src_offset,int dst_step, int src_step,
                      int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
 {
     int dx = get_global_id(0);
@@ -202,24 +200,24 @@ __kernel void resizeLN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
     int y_ = INC(y,src_rows);
     int x_ = INC(x,src_cols);
     int4 srcpos;
-    srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
-    srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
-    srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
-    srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
+    srcpos.x = mad24(y, src_step, x+src_offset);
+    srcpos.y = mad24(y, src_step, x_+src_offset);
+    srcpos.z = mad24(y_, src_step, x+src_offset);
+    srcpos.w = mad24(y_, src_step, x_+src_offset);
     int4 data0 = convert_int4(src[srcpos.x]);
     int4 data1 = convert_int4(src[srcpos.y]);
     int4 data2 = convert_int4(src[srcpos.z]);
     int4 data3 = convert_int4(src[srcpos.w]);
     int4 val = mul24((int4)mul24(U1, V1) ,  data0) + mul24((int4)mul24(U, V1) ,  data1)
                +mul24((int4)mul24(U1, V) ,  data2)+mul24((int4)mul24(U, V) ,  data3);
-    int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
+    int dstpos = mad24(dy, dst_step, dx+dst_offset);
     uchar4 uval =   convert_uchar4((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
     if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
          dst[dstpos] = uval;
 }
 
 __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
+                     int dst_offset, int src_offset,int dst_step, int src_step,
                      int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
 {
     int dx = get_global_id(0);
@@ -239,10 +237,10 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
     float u1 = 1.f-u;
     float v1 = 1.f-v;
     int4 srcpos;
-    srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
-    srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
-    srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
-    srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
+    srcpos.x = mad24(y, src_step, x+src_offset);
+    srcpos.y = mad24(y, src_step, x_+src_offset);
+    srcpos.z = mad24(y_, src_step, x+src_offset);
+    srcpos.w = mad24(y_, src_step, x_+src_offset);
     float data0 = src[srcpos.x];
     float data1 = src[srcpos.y];
     float data2 = src[srcpos.z];
@@ -252,13 +250,13 @@ __kernel void resizeLN_C1_D5(__global float * dst, __global float * src,
     float val2 = u1 *  data2 +
                 u *  data3;
     float val = v1 * val1 + v * val2;
-    int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
+    int dstpos = mad24(dy, dst_step, dx+dst_offset);
     if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
          dst[dstpos] = val;
 }
 
 __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
+                     int dst_offset, int src_offset,int dst_step, int src_step,
                      int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify )
 {
     int dx = get_global_id(0);
@@ -278,10 +276,10 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
     float u1 = 1.f-u;
     float v1 = 1.f-v;
     int4 srcpos;
-    srcpos.x = mad24(y, srcstep_in_pixel, x+srcoffset_in_pixel);
-    srcpos.y = mad24(y, srcstep_in_pixel, x_+srcoffset_in_pixel);
-    srcpos.z = mad24(y_, srcstep_in_pixel, x+srcoffset_in_pixel);
-    srcpos.w = mad24(y_, srcstep_in_pixel, x_+srcoffset_in_pixel);
+    srcpos.x = mad24(y, src_step, x+src_offset);
+    srcpos.y = mad24(y, src_step, x_+src_offset);
+    srcpos.z = mad24(y_, src_step, x+src_offset);
+    srcpos.w = mad24(y_, src_step, x_+src_offset);
     float4 s_data1, s_data2, s_data3, s_data4;
     s_data1 = src[srcpos.x];
     s_data2 = src[srcpos.y];
@@ -289,129 +287,32 @@ __kernel void resizeLN_C4_D5(__global float4 * dst, __global float4 * src,
     s_data4 = src[srcpos.w];
     float4 val = u1 * v1 * s_data1 + u * v1 * s_data2
               +u1 * v *s_data3 + u * v *s_data4;
-    int dstpos = mad24(dy, dststep_in_pixel, dx+dstoffset_in_pixel);
+    int dstpos = mad24(dy, dst_step, dx+dst_offset);
 
     if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
          dst[dstpos] = val;
 }
 
-__kernel void resizeNN_C1_D0(__global uchar * dst, __global uchar * src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
+#elif defined NN
+
+__kernel void resizeNN(__global T * dst, __global T * src,
+                       int dst_offset, int src_offset,int dst_step, int src_step,
+                       int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify)
 {
-    int gx = get_global_id(0);
+    int dx = get_global_id(0);
     int dy = get_global_id(1);
 
-    gx = (gx<<2) - (dstoffset_in_pixel&3);
-    //int4 GX = (int4)(gx, gx+1, gx+2, gx+3);
-
-    int4 sx;
-    int sy;
-    F ss1 = gx*ifx;
-    F ss2 = (gx+1)*ifx;
-    F ss3 = (gx+2)*ifx;
-    F ss4 = (gx+3)*ifx;
-    F s5 = dy * ify;
-    sx.s0 = min((int)floor(ss1), src_cols-1);
-    sx.s1 = min((int)floor(ss2), src_cols-1);
-    sx.s2 = min((int)floor(ss3), src_cols-1);
-    sx.s3 = min((int)floor(ss4), src_cols-1);
-    sy = min((int)floor(s5), src_rows-1);
-
-    uchar4 val;
-    int4 pos = mad24((int4)sy, (int4)srcstep_in_pixel, sx+(int4)srcoffset_in_pixel);
-    val.s0 = src[pos.s0];
-    val.s1 = src[pos.s1];
-    val.s2 = src[pos.s2];
-    val.s3 = src[pos.s3];
-
-    //__global uchar4* d = (__global uchar4*)(dst + dstoffset_in_pixel + dy * dststep_in_pixel + gx);
-    //uchar4 dVal = *d;
-    pos = mad24(dy, dststep_in_pixel, gx+dstoffset_in_pixel);
-    pos.y++;
-    pos.z+=2;
-    pos.w+=3;
-
-        int con = (gx >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows && (dstoffset_in_pixel&3)==0);
-    if(con)
+    if (dx < dst_cols && dy < dst_rows)
     {
-        *(__global uchar4*)(dst + pos.x)=val;
-    }
-    else
-    {
-        if(gx >= 0 && gx < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos.x]=val.x;
-        }
-        if(gx+1 >= 0 && gx+1 < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos.y]=val.y;
-        }
-        if(gx+2 >= 0 && gx+2 < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos.z]=val.z;
-        }
-        if(gx+3 >= 0 && gx+3 < dst_cols && dy >= 0 && dy < dst_rows)
-        {
-            dst[pos.w]=val.w;
-        }
+        float s1 = dx * ifx, s2 = dy * ify;
+        int sx = min(convert_int_sat_rtn(s1), src_cols - 1);
+        int sy = min(convert_int_sat_rtn(s2), src_rows - 1);
+
+        int dst_index = mad24(dy, dst_step, dx + dst_offset);
+        int src_index = mad24(sy, src_step, sx + src_offset);
+
+        dst[dst_index] = src[src_index];
     }
 }
 
-__kernel void resizeNN_C4_D0(__global uchar4 * dst, __global uchar4 * src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    F s1 = dx*ifx;
-    F s2 = dy*ify;
-    int sx = fmin((float)floor(s1), (float)src_cols-1);
-    int sy = fmin((float)floor(s2), (float)src_rows-1);
-    int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
-    int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
-
-    if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
-        dst[dpos] = src[spos];
-
-}
-
-__kernel void resizeNN_C1_D5(__global float * dst, __global float * src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-
-    F s1 = dx*ifx;
-    F s2 = dy*ify;
-    int sx = fmin((float)floor(s1), (float)src_cols-1);
-    int sy = fmin((float)floor(s2), (float)src_rows-1);
-
-    int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
-    int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
-    if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
-        dst[dpos] = src[spos];
-
-}
-
-__kernel void resizeNN_C4_D5(__global float4 * dst, __global float4 * src,
-                     int dstoffset_in_pixel, int srcoffset_in_pixel,int dststep_in_pixel, int srcstep_in_pixel,
-                     int src_cols, int src_rows, int dst_cols, int dst_rows, F ifx, F ify )
-{
-    int dx = get_global_id(0);
-    int dy = get_global_id(1);
-    F s1 = dx*ifx;
-    F s2 = dy*ify;
-    int s_col = floor(s1);
-    int s_row = floor(s2);
-    int sx = min(s_col, src_cols-1);
-    int sy = min(s_row, src_rows-1);
-    int dpos = mad24(dy, dststep_in_pixel, dx + dstoffset_in_pixel);
-    int spos = mad24(sy, srcstep_in_pixel, sx + srcoffset_in_pixel);
-
-    if(dx>=0 && dx<dst_cols && dy>=0 && dy<dst_rows)
-        dst[dpos] = src[spos];
-
-}
+#endif

From 0a698bf145befee0995274c7152eeb316fac6413 Mon Sep 17 00:00:00 2001
From: Alexander Alekhin <alexander.alekhin@itseez.com>
Date: Fri, 15 Nov 2013 18:49:11 +0400
Subject: [PATCH 42/45] ocl: fix cvtColor XYZ: variables on stack

---
 modules/ocl/src/color.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/modules/ocl/src/color.cpp b/modules/ocl/src/color.cpp
index b807afd41..f30a4ce78 100644
--- a/modules/ocl/src/color.cpp
+++ b/modules/ocl/src/color.cpp
@@ -305,7 +305,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         bidx = code == CV_BGR2XYZ ? 0 : 2;
         dst.create(sz, CV_MAKE_TYPE(depth, 3));
 
-        void * pdata = NULL;
+        Mat c;
         if (depth == CV_32F)
         {
             float coeffs[] =
@@ -320,7 +320,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
                 std::swap(coeffs[3], coeffs[5]);
                 std::swap(coeffs[6], coeffs[8]);
             }
-            pdata = coeffs;
+            Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
         }
         else
         {
@@ -336,9 +336,9 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
                 std::swap(coeffs[3], coeffs[5]);
                 std::swap(coeffs[6], coeffs[8]);
             }
-            pdata = coeffs;
+            Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
         }
-        oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32FC1 : CV_32SC1, pdata);
+        oclMat oclCoeffs(c);
 
         fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs);
         break;
@@ -351,7 +351,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
         bidx = code == CV_XYZ2BGR ? 0 : 2;
         dst.create(sz, CV_MAKE_TYPE(depth, dcn));
 
-        void * pdata = NULL;
+        Mat c;
         if (depth == CV_32F)
         {
             float coeffs[] =
@@ -366,7 +366,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
                 std::swap(coeffs[1], coeffs[7]);
                 std::swap(coeffs[2], coeffs[8]);
             }
-            pdata = coeffs;
+            Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
         }
         else
         {
@@ -382,9 +382,9 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
                 std::swap(coeffs[1], coeffs[7]);
                 std::swap(coeffs[2], coeffs[8]);
             }
-            pdata = coeffs;
+            Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
         }
-        oclMat oclCoeffs(1, 9, depth == CV_32F ? CV_32FC1 : CV_32SC1, pdata);
+        oclMat oclCoeffs(c);
 
         toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs);
         break;

From 2b106db02fcaf3b8cad46148210f788e6fae9202 Mon Sep 17 00:00:00 2001
From: StevenPuttemans <steven.puttemans@kuleuven.be>
Date: Wed, 30 Oct 2013 13:34:27 +0100
Subject: [PATCH 43/45] Applied all fixes related to translating C to C++ code
 Also fixed some typos and code alignment Also adapted tutorial CPP samples
 Fixed some identation problems

---
 samples/cpp/3calibration.cpp                  |  2 +-
 .../OpenEXRimages_HDR_Retina_toneMapping.cpp  |  6 +-
 ...EXRimages_HDR_Retina_toneMapping_video.cpp |  4 +-
 samples/cpp/Qt_sample/main.cpp                | 55 +++++++++----------
 samples/cpp/bagofwords_classification.cpp     |  4 --
 samples/cpp/bgfg_segm.cpp                     |  8 +--
 samples/cpp/build3dmodel.cpp                  |  2 +-
 samples/cpp/calibration.cpp                   |  2 +-
 samples/cpp/camshiftdemo.cpp                  |  4 +-
 samples/cpp/chamfer.cpp                       |  2 +-
 samples/cpp/descriptor_extractor_matcher.cpp  |  4 +-
 .../cpp/detection_based_tracker_sample.cpp    |  2 +-
 samples/cpp/distrans.cpp                      |  2 +-
 samples/cpp/drawing.cpp                       |  4 +-
 samples/cpp/edge.cpp                          |  2 +-
 samples/cpp/em.cpp                            |  2 +-
 samples/cpp/fback.cpp                         |  6 +-
 samples/cpp/ffilldemo.cpp                     |  6 +-
 samples/cpp/generic_descriptor_match.cpp      |  2 +-
 samples/cpp/grabcut.cpp                       |  8 +--
 samples/cpp/houghcircles.cpp                  |  2 +-
 samples/cpp/houghlines.cpp                    |  2 +-
 samples/cpp/image.cpp                         |  2 +-
 samples/cpp/lkdemo.cpp                        |  2 +-
 samples/cpp/meanshift_segmentation.cpp        |  2 +-
 samples/cpp/minarea.cpp                       |  2 +-
 samples/cpp/morphology2.cpp                   |  2 +-
 samples/cpp/pca.cpp                           |  2 +-
 samples/cpp/phase_corr.cpp                    |  2 +-
 samples/cpp/points_classifier.cpp             | 14 ++---
 samples/cpp/rgbdodometry.cpp                  |  4 +-
 samples/cpp/segment_objects.cpp               |  2 -
 samples/cpp/stereo_calib.cpp                  |  4 +-
 .../HighGUI/BasicLinearTransformsTrackbar.cpp | 17 +++---
 .../Histograms_Matching/EqualizeHist_Demo.cpp |  6 +-
 .../MatchTemplate_Demo.cpp                    |  4 +-
 .../calcBackProject_Demo1.cpp                 |  4 +-
 .../calcBackProject_Demo2.cpp                 |  4 +-
 .../Histograms_Matching/calcHist_Demo.cpp     |  2 +-
 .../Histograms_Matching/compareHist_Demo.cpp  | 44 ++++++++-------
 .../tutorial_code/ImgProc/Morphology_1.cpp    |  6 +-
 .../tutorial_code/ImgProc/Morphology_2.cpp    | 10 ++--
 .../cpp/tutorial_code/ImgProc/Pyramids.cpp    |  2 +-
 .../cpp/tutorial_code/ImgProc/Smoothing.cpp   |  4 +-
 .../cpp/tutorial_code/ImgProc/Threshold.cpp   | 12 ++--
 .../ImgTrans/CannyDetector_Demo.cpp           |  4 +-
 .../ImgTrans/Geometric_Transforms_Demo.cpp    |  6 +-
 .../ImgTrans/HoughCircle_Demo.cpp             |  4 +-
 .../ImgTrans/HoughLines_Demo.cpp              |  6 +-
 .../tutorial_code/ImgTrans/Laplace_Demo.cpp   |  4 +-
 .../cpp/tutorial_code/ImgTrans/Remap_Demo.cpp |  2 +-
 .../cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp |  4 +-
 .../ImgTrans/copyMakeBorder_demo.cpp          |  2 +-
 .../tutorial_code/ImgTrans/filter2D_demo.cpp  |  2 +-
 .../ShapeDescriptors/findContours_demo.cpp    |  6 +-
 .../generalContours_demo1.cpp                 |  6 +-
 .../generalContours_demo2.cpp                 |  6 +-
 .../ShapeDescriptors/hull_demo.cpp            |  8 +--
 .../ShapeDescriptors/moments_demo.cpp         |  6 +-
 .../pointPolygonTest_demo.cpp                 |  4 +-
 .../TrackingMotion/cornerDetector_Demo.cpp    |  6 +-
 .../TrackingMotion/cornerHarris_Demo.cpp      |  6 +-
 .../TrackingMotion/cornerSubPix_Demo.cpp      |  6 +-
 .../goodFeaturesToTrack_Demo.cpp              |  6 +-
 .../camera_calibration/camera_calibration.cpp |  2 +-
 .../calib3d/stereoBM/SBM_Sample.cpp           |  2 +-
 .../tutorial_code/core/Matrix/Drawing_1.cpp   |  4 +-
 .../tutorial_code/core/Matrix/Drawing_2.cpp   |  4 +-
 .../interoperability_with_OpenCV_1.cpp        |  4 +-
 .../mat_mask_operations.cpp                   |  8 +--
 .../features2D/SURF_Homography.cpp            |  4 +-
 .../display_image/display_image.cpp           |  2 +-
 .../windows_visual_studio_Opencv/Test.cpp     | 10 ++--
 .../objectDetection/objectDetection.cpp       | 10 ++--
 .../objectDetection/objectDetection2.cpp      | 10 ++--
 samples/cpp/video_dmtx.cpp                    |  2 +-
 samples/cpp/video_homography.cpp              |  2 +-
 samples/cpp/watershed.cpp                     |  4 +-
 78 files changed, 217 insertions(+), 229 deletions(-)

diff --git a/samples/cpp/3calibration.cpp b/samples/cpp/3calibration.cpp
index 061e97795..08ba39765 100644
--- a/samples/cpp/3calibration.cpp
+++ b/samples/cpp/3calibration.cpp
@@ -271,7 +271,7 @@ int main( int argc, char** argv )
             {
                 vector<Point2f> ptvec;
                 imageSize = view.size();
-                cvtColor(view, viewGray, CV_BGR2GRAY);
+                cvtColor(view, viewGray, COLOR_BGR2GRAY);
                 bool found = findChessboardCorners( view, boardSize, ptvec, CV_CALIB_CB_ADAPTIVE_THRESH );
 
                 drawChessboardCorners( view, boardSize, Mat(ptvec), found );
diff --git a/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping.cpp b/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping.cpp
index e89003d19..be3dcd385 100644
--- a/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping.cpp
+++ b/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping.cpp
@@ -1,10 +1,10 @@
 
 //============================================================================
-// Name        : HighDynamicRange_RetinaCompression.cpp
+// Name        : OpenEXRimages_HDR_Retina_toneMapping.cpp
 // Author      : Alexandre Benoit (benoit.alexandre.vision@gmail.com)
 // Version     : 0.1
 // Copyright   : Alexandre Benoit, LISTIC Lab, july 2011
-// Description : HighDynamicRange compression (tone mapping) with the help of the Gipsa/Listic's retina in C++, Ansi-style
+// Description : HighDynamicRange retina tone mapping with the help of the Gipsa/Listic's retina in C++, Ansi-style
 //============================================================================
 
 #include <iostream>
@@ -69,7 +69,7 @@ static void drawPlot(const cv::Mat curve, const std::string figureTitle, const i
      {
          cv::Mat rgbIntImg;
          outputMat.convertTo(rgbIntImg, CV_8UC3);
-         cvtColor(rgbIntImg, intGrayImage, CV_BGR2GRAY);
+         cvtColor(rgbIntImg, intGrayImage, cv::COLOR_BGR2GRAY);
      }
 
      // get histogram density probability in order to cut values under above edges limits (here 5-95%)... usefull for HDR pixel errors cancellation
diff --git a/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp b/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp
index fc3ac8367..509f3e877 100644
--- a/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp
+++ b/samples/cpp/OpenEXRimages_HDR_Retina_toneMapping_video.cpp
@@ -4,7 +4,7 @@
 // Author      : Alexandre Benoit (benoit.alexandre.vision@gmail.com)
 // Version     : 0.2
 // Copyright   : Alexandre Benoit, LISTIC Lab, december 2011
-// Description : HighDynamicRange compression (tone mapping) for image sequences with the help of the Gipsa/Listic's retina in C++, Ansi-style
+// Description : HighDynamicRange retina tone mapping for image sequences with the help of the Gipsa/Listic's retina in C++, Ansi-style
 // Known issues: the input OpenEXR sequences can have bad computed pixels that should be removed
 //               => a simple method consists of cutting histogram edges (a slider for this on the UI is provided)
 //               => however, in image sequences, this histogramm cut must be done in an elegant way from frame to frame... still not done...
@@ -92,7 +92,7 @@ static void rescaleGrayLevelMat(const cv::Mat &inputMat, cv::Mat &outputMat, con
      {
          cv::Mat rgbIntImg;
          normalisedImage.convertTo(rgbIntImg, CV_8UC3);
-         cvtColor(rgbIntImg, intGrayImage, CV_BGR2GRAY);
+         cvtColor(rgbIntImg, intGrayImage, cv::COLOR_BGR2GRAY);
      }
 
      // get histogram density probability in order to cut values under above edges limits (here 5-95%)... usefull for HDR pixel errors cancellation
diff --git a/samples/cpp/Qt_sample/main.cpp b/samples/cpp/Qt_sample/main.cpp
index 105b973eb..879453940 100644
--- a/samples/cpp/Qt_sample/main.cpp
+++ b/samples/cpp/Qt_sample/main.cpp
@@ -4,7 +4,7 @@
 
 #include <iostream>
 #include <vector>
-#include <opencv/highgui.h>
+#include <opencv2/highgui/highgui.hpp>
 
 #if defined WIN32 || defined _WIN32 || defined WINCE
     #include <windows.h>
@@ -20,8 +20,7 @@
     #include <GL/gl.h>
 #endif
 
-#include <opencv/cxcore.h>
-#include <opencv/cv.h>
+#include <opencv2/core/core.hpp>
 
 using namespace std;
 using namespace cv;
@@ -111,12 +110,12 @@ static void initPOSIT(std::vector<CvPoint3D32f> *modelPoints)
     modelPoints->push_back(cvPoint3D32f(0.0f, CUBE_SIZE, 0.0f));
 }
 
-static void foundCorners(vector<CvPoint2D32f> *srcImagePoints,IplImage* source, IplImage* grayImage)
+static void foundCorners(vector<CvPoint2D32f> *srcImagePoints, const Mat& source, Mat& grayImage)
 {
-    cvCvtColor(source,grayImage,CV_RGB2GRAY);
-    cvSmooth( grayImage, grayImage,CV_GAUSSIAN,11);
-    cvNormalize(grayImage, grayImage, 0, 255, CV_MINMAX);
-    cvThreshold( grayImage, grayImage, 26, 255, CV_THRESH_BINARY_INV);//25
+    cvtColor(source, grayImage, COLOR_RGB2GRAY);
+    GaussianBlur(grayImage, grayImage, Size(11,11), 0, 0);
+    normalize(grayImage, grayImage, 0, 255, NORM_MINMAX);
+    threshold(grayImage, grayImage, 26, 255, THRESH_BINARY_INV); //25
 
     Mat MgrayImage = grayImage;
     //For debug
@@ -189,8 +188,8 @@ static void foundCorners(vector<CvPoint2D32f> *srcImagePoints,IplImage* source,
         for(size_t i = 0 ; i<srcImagePoints_temp.size(); i++ )
         {
             ss<<i;
-            circle(Msource,srcImagePoints->at(i),5,CV_RGB(255,0,0));
-            putText( Msource, ss.str(), srcImagePoints->at(i),CV_FONT_HERSHEY_SIMPLEX,1,CV_RGB(255,0,0));
+            circle(Msource,srcImagePoints->at(i),5,Scalar(0,0,255));
+            putText(Msource,ss.str(),srcImagePoints->at(i),FONT_HERSHEY_SIMPLEX,1,Scalar(0,0,255));
             ss.str("");
 
             //new coordinate system in the middle of the frame and reversed (camera coordinate system)
@@ -224,19 +223,19 @@ static void createOpenGLMatrixFrom(float *posePOSIT,const CvMatr32f &rotationMat
 int main(void)
 {
     help();
-    CvCapture* video = cvCaptureFromFile("cube4.avi");
-    CV_Assert(video);
+    VideoCapture video("cube4.avi");
+    CV_Assert(video.isOpened());
 
-    IplImage* source = cvCreateImage(cvGetSize(cvQueryFrame(video)),8,3);
-    IplImage* grayImage = cvCreateImage(cvGetSize(cvQueryFrame(video)),8,1);
+    Mat source, grayImage;
 
-    cvNamedWindow("original",CV_WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO);
-    cvNamedWindow("POSIT",CV_WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO);
+    video >> source;
+
+    namedWindow("original", WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO);
+    namedWindow("POSIT", WINDOW_AUTOSIZE | CV_WINDOW_FREERATIO);
     displayOverlay("POSIT", "We lost the 4 corners' detection quite often (the red circles disappear). This demo is only to illustrate how to use OpenGL callback.\n -- Press ESC to exit.", 10000);
-    //For debug
-    //cvNamedWindow("tempGray",CV_WINDOW_AUTOSIZE);
+
     float OpenGLMatrix[]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-    cvSetOpenGlDrawCallback("POSIT",on_opengl,OpenGLMatrix);
+    setOpenGlDrawCallback("POSIT",on_opengl,OpenGLMatrix);
 
     vector<CvPoint3D32f> modelPoints;
     initPOSIT(&modelPoints);
@@ -251,26 +250,22 @@ int main(void)
     vector<CvPoint2D32f> srcImagePoints(4,cvPoint2D32f(0,0));
 
 
-    while(cvWaitKey(33) != 27)
+    while(waitKey(33) != 27)
     {
-        source=cvQueryFrame(video);
-        cvShowImage("original",source);
+        video >> source;
+        imshow("original",source);
 
         foundCorners(&srcImagePoints,source,grayImage);
         cvPOSIT( positObject, &srcImagePoints[0], FOCAL_LENGTH, criteria, rotation_matrix, translation_vector );
         createOpenGLMatrixFrom(OpenGLMatrix,rotation_matrix,translation_vector);
 
-        cvShowImage("POSIT",source);
-        //For debug
-        //cvShowImage("tempGray",grayImage);
+        imshow("POSIT",source);
 
-        if (cvGetCaptureProperty(video,CV_CAP_PROP_POS_AVI_RATIO)>0.99)
-            cvSetCaptureProperty(video,CV_CAP_PROP_POS_AVI_RATIO,0);
+        if (VideoCapture::get(video,CV_CAP_PROP_POS_AVI_RATIO)>0.99)
+            VideoCapture::get(video,CV_CAP_PROP_POS_AVI_RATIO,0);
     }
 
-    cvDestroyAllWindows();
-    cvReleaseImage(&grayImage);
-    cvReleaseCapture(&video);
+    destroyAllWindows();
     cvReleasePOSITObject(&positObject);
 
     return 0;
diff --git a/samples/cpp/bagofwords_classification.cpp b/samples/cpp/bagofwords_classification.cpp
index e24a770f8..39342c4d3 100644
--- a/samples/cpp/bagofwords_classification.cpp
+++ b/samples/cpp/bagofwords_classification.cpp
@@ -54,10 +54,6 @@ static void help(char** argv)
     << "\n";
 }
 
-
-
-
-
 static void makeDir( const string& dir )
 {
 #if defined WIN32 || defined _WIN32
diff --git a/samples/cpp/bgfg_segm.cpp b/samples/cpp/bgfg_segm.cpp
index aff7d308e..f0d313ef6 100644
--- a/samples/cpp/bgfg_segm.cpp
+++ b/samples/cpp/bgfg_segm.cpp
@@ -45,10 +45,10 @@ int main(int argc, const char** argv)
         return -1;
     }
 
-    namedWindow("image", CV_WINDOW_NORMAL);
-    namedWindow("foreground mask", CV_WINDOW_NORMAL);
-    namedWindow("foreground image", CV_WINDOW_NORMAL);
-    namedWindow("mean background image", CV_WINDOW_NORMAL);
+    namedWindow("image", WINDOW_NORMAL);
+    namedWindow("foreground mask", WINDOW_NORMAL);
+    namedWindow("foreground image", WINDOW_NORMAL);
+    namedWindow("mean background image", WINDOW_NORMAL);
 
     BackgroundSubtractorMOG2 bg_model;//(100, 3, 0.3, 5);
 
diff --git a/samples/cpp/build3dmodel.cpp b/samples/cpp/build3dmodel.cpp
index b896a3936..6700fcc6b 100644
--- a/samples/cpp/build3dmodel.cpp
+++ b/samples/cpp/build3dmodel.cpp
@@ -418,7 +418,7 @@ static void build3dmodel( const Ptr<FeatureDetector>& detector,
     for( size_t i = 0; i < nimages; i++ )
     {
         Mat img = imread(imageList[i], 1), gray;
-        cvtColor(img, gray, CV_BGR2GRAY);
+        cvtColor(img, gray, COLOR_BGR2GRAY);
 
         vector<KeyPoint> keypoints;
         detector->detect(gray, keypoints);
diff --git a/samples/cpp/calibration.cpp b/samples/cpp/calibration.cpp
index f4ff81077..2d2077646 100644
--- a/samples/cpp/calibration.cpp
+++ b/samples/cpp/calibration.cpp
@@ -461,7 +461,7 @@ int main( int argc, char** argv )
             flip( view, view, 0 );
 
         vector<Point2f> pointbuf;
-        cvtColor(view, viewGray, CV_BGR2GRAY);
+        cvtColor(view, viewGray, COLOR_BGR2GRAY);
 
         bool found;
         switch( pattern )
diff --git a/samples/cpp/camshiftdemo.cpp b/samples/cpp/camshiftdemo.cpp
index 0b17239ed..8a1ed19e7 100644
--- a/samples/cpp/camshiftdemo.cpp
+++ b/samples/cpp/camshiftdemo.cpp
@@ -113,7 +113,7 @@ int main( int argc, const char** argv )
 
         if( !paused )
         {
-            cvtColor(image, hsv, CV_BGR2HSV);
+            cvtColor(image, hsv, COLOR_BGR2HSV);
 
             if( trackObject )
             {
@@ -163,7 +163,7 @@ int main( int argc, const char** argv )
                 }
 
                 if( backprojMode )
-                    cvtColor( backproj, image, CV_GRAY2BGR );
+                    cvtColor( backproj, image, COLOR_GRAY2BGR );
                 ellipse( image, trackBox, Scalar(0,0,255), 3, CV_AA );
             }
         }
diff --git a/samples/cpp/chamfer.cpp b/samples/cpp/chamfer.cpp
index 4be87cf76..f83a24f9c 100644
--- a/samples/cpp/chamfer.cpp
+++ b/samples/cpp/chamfer.cpp
@@ -40,7 +40,7 @@ int main( int argc, const char** argv )
         return -1;
     }
     Mat cimg;
-    cvtColor(img, cimg, CV_GRAY2BGR);
+    cvtColor(img, cimg, COLOR_GRAY2BGR);
 
     // if the image and the template are not edge maps but normal grayscale images,
     // you might want to uncomment the lines below to produce the maps. You can also
diff --git a/samples/cpp/descriptor_extractor_matcher.cpp b/samples/cpp/descriptor_extractor_matcher.cpp
index 7aa529910..98c645223 100644
--- a/samples/cpp/descriptor_extractor_matcher.cpp
+++ b/samples/cpp/descriptor_extractor_matcher.cpp
@@ -208,7 +208,7 @@ static void doIteration( const Mat& img1, Mat& img2, bool isWarpPerspective,
                 matchesMask[i1] = 1;
         }
         // draw inliers
-        drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, CV_RGB(0, 255, 0), CV_RGB(0, 0, 255), matchesMask
+        drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, Scalar(0, 255, 0), Scalar(255, 0, 0), matchesMask
 #if DRAW_RICH_KEYPOINTS_MODE
                      , DrawMatchesFlags::DRAW_RICH_KEYPOINTS
 #endif
@@ -218,7 +218,7 @@ static void doIteration( const Mat& img1, Mat& img2, bool isWarpPerspective,
         // draw outliers
         for( size_t i1 = 0; i1 < matchesMask.size(); i1++ )
             matchesMask[i1] = !matchesMask[i1];
-        drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, CV_RGB(0, 0, 255), CV_RGB(255, 0, 0), matchesMask,
+        drawMatches( img1, keypoints1, img2, keypoints2, filteredMatches, drawImg, Scalar(255, 0, 0), Scalar(0, 0, 255), matchesMask,
                      DrawMatchesFlags::DRAW_OVER_OUTIMG | DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS );
 #endif
 
diff --git a/samples/cpp/detection_based_tracker_sample.cpp b/samples/cpp/detection_based_tracker_sample.cpp
index 6aeb1b245..4c922dee7 100644
--- a/samples/cpp/detection_based_tracker_sample.cpp
+++ b/samples/cpp/detection_based_tracker_sample.cpp
@@ -116,7 +116,7 @@ static int test_FaceDetector(int argc, char *argv[])
         LOGD("\n\nSTEP n=%d        from prev step %f ms\n\n", n, t_ms);
         m=images[n-1];
         CV_Assert(! m.empty());
-        cvtColor(m, gray, CV_BGR2GRAY);
+        cvtColor(m, gray, COLOR_BGR2GRAY);
 
         fd.process(gray);
 
diff --git a/samples/cpp/distrans.cpp b/samples/cpp/distrans.cpp
index 4e3c3a2b0..e10a96311 100644
--- a/samples/cpp/distrans.cpp
+++ b/samples/cpp/distrans.cpp
@@ -128,7 +128,7 @@ int main( int argc, const char** argv )
         // Call to update the view
         onTrackbar(0, 0);
 
-        int c = cvWaitKey(0) & 255;
+        int c = waitKey(0) & 255;
 
         if( c == 27 )
             break;
diff --git a/samples/cpp/drawing.cpp b/samples/cpp/drawing.cpp
index 546746390..313f100ec 100644
--- a/samples/cpp/drawing.cpp
+++ b/samples/cpp/drawing.cpp
@@ -157,14 +157,14 @@ int main()
             return 0;
     }
 
-    Size textsize = getTextSize("OpenCV forever!", CV_FONT_HERSHEY_COMPLEX, 3, 5, 0);
+    Size textsize = getTextSize("OpenCV forever!", FONT_HERSHEY_COMPLEX, 3, 5, 0);
     Point org((width - textsize.width)/2, (height - textsize.height)/2);
 
     Mat image2;
     for( i = 0; i < 255; i += 2 )
     {
         image2 = image - Scalar::all(i);
-        putText(image2, "OpenCV forever!", org, CV_FONT_HERSHEY_COMPLEX, 3,
+        putText(image2, "OpenCV forever!", org, FONT_HERSHEY_COMPLEX, 3,
                 Scalar(i, i, 255), 5, lineType);
 
         imshow(wndname, image2);
diff --git a/samples/cpp/edge.cpp b/samples/cpp/edge.cpp
index 6ff30a2cf..f11d20fbb 100644
--- a/samples/cpp/edge.cpp
+++ b/samples/cpp/edge.cpp
@@ -49,7 +49,7 @@ int main( int argc, const char** argv )
         return -1;
     }
     cedge.create(image.size(), image.type());
-    cvtColor(image, gray, CV_BGR2GRAY);
+    cvtColor(image, gray, COLOR_BGR2GRAY);
 
     // Create a window
     namedWindow("Edge map", 1);
diff --git a/samples/cpp/em.cpp b/samples/cpp/em.cpp
index 4c4664b8f..ab7f27e83 100644
--- a/samples/cpp/em.cpp
+++ b/samples/cpp/em.cpp
@@ -59,7 +59,7 @@ int main( int /*argc*/, char** /*argv*/ )
     params.cov_mat_type = CvEM::COV_MAT_DIAGONAL;
     params.start_step = CvEM::START_E_STEP;
     params.means = em_model.get_means();
-    params.covs = (const CvMat**)em_model.get_covs();
+    params.covs = em_model.get_covs();
     params.weights = em_model.get_weights();
 
     em_model2.train( samples, Mat(), params, &labels );
diff --git a/samples/cpp/fback.cpp b/samples/cpp/fback.cpp
index 4923e8abc..5293fd1f7 100644
--- a/samples/cpp/fback.cpp
+++ b/samples/cpp/fback.cpp
@@ -42,13 +42,13 @@ int main(int, char**)
     for(;;)
     {
         cap >> frame;
-        cvtColor(frame, gray, CV_BGR2GRAY);
+        cvtColor(frame, gray, COLOR_BGR2GRAY);
 
         if( prevgray.data )
         {
             calcOpticalFlowFarneback(prevgray, gray, flow, 0.5, 3, 15, 3, 5, 1.2, 0);
-            cvtColor(prevgray, cflow, CV_GRAY2BGR);
-            drawOptFlowMap(flow, cflow, 16, 1.5, CV_RGB(0, 255, 0));
+            cvtColor(prevgray, cflow, COLOR_GRAY2BGR);
+            drawOptFlowMap(flow, cflow, 16, 1.5, Scalar(0, 255, 0));
             imshow("flow", cflow);
         }
         if(waitKey(30)>=0)
diff --git a/samples/cpp/ffilldemo.cpp b/samples/cpp/ffilldemo.cpp
index a7fff778f..fd697b23a 100644
--- a/samples/cpp/ffilldemo.cpp
+++ b/samples/cpp/ffilldemo.cpp
@@ -81,7 +81,7 @@ int main( int argc, char** argv )
     }
     help();
     image0.copyTo(image);
-    cvtColor(image0, gray, CV_BGR2GRAY);
+    cvtColor(image0, gray, COLOR_BGR2GRAY);
     mask.create(image0.rows+2, image0.cols+2, CV_8UC1);
 
     namedWindow( "image", 0 );
@@ -106,7 +106,7 @@ int main( int argc, char** argv )
             if( isColor )
             {
                 cout << "Grayscale mode is set\n";
-                cvtColor(image0, gray, CV_BGR2GRAY);
+                cvtColor(image0, gray, COLOR_BGR2GRAY);
                 mask = Scalar::all(0);
                 isColor = false;
             }
@@ -135,7 +135,7 @@ int main( int argc, char** argv )
         case 'r':
             cout << "Original image is restored\n";
             image0.copyTo(image);
-            cvtColor(image, gray, CV_BGR2GRAY);
+            cvtColor(image, gray, COLOR_BGR2GRAY);
             mask = Scalar::all(0);
             break;
         case 's':
diff --git a/samples/cpp/generic_descriptor_match.cpp b/samples/cpp/generic_descriptor_match.cpp
index c86fdeb35..1cb03c6a9 100644
--- a/samples/cpp/generic_descriptor_match.cpp
+++ b/samples/cpp/generic_descriptor_match.cpp
@@ -79,7 +79,7 @@ Mat DrawCorrespondences(const Mat& img1, const vector<KeyPoint>& features1, cons
 
     for (size_t i = 0; i < features1.size(); i++)
     {
-        circle(img_corr, features1[i].pt, 3, CV_RGB(255, 0, 0));
+        circle(img_corr, features1[i].pt, 3, Scalar(0, 0, 255));
     }
 
     for (size_t i = 0; i < features2.size(); i++)
diff --git a/samples/cpp/grabcut.cpp b/samples/cpp/grabcut.cpp
index 40280e457..b7d0d4dfd 100644
--- a/samples/cpp/grabcut.cpp
+++ b/samples/cpp/grabcut.cpp
@@ -296,15 +296,15 @@ int main( int argc, char** argv )
     help();
 
     const string winName = "image";
-    cvNamedWindow( winName.c_str(), CV_WINDOW_AUTOSIZE );
-    cvSetMouseCallback( winName.c_str(), on_mouse, 0 );
+    namedWindow( winName, WINDOW_AUTOSIZE );
+    setMouseCallback( winName, on_mouse, 0 );
 
     gcapp.setImageAndWinName( image, winName );
     gcapp.showImage();
 
     for(;;)
     {
-        int c = cvWaitKey(0);
+        int c = waitKey(0);
         switch( (char) c )
         {
         case '\x1b':
@@ -331,6 +331,6 @@ int main( int argc, char** argv )
     }
 
 exit_main:
-    cvDestroyWindow( winName.c_str() );
+    destroyWindow( winName );
     return 0;
 }
diff --git a/samples/cpp/houghcircles.cpp b/samples/cpp/houghcircles.cpp
index bc53fd124..a10e0d2d0 100644
--- a/samples/cpp/houghcircles.cpp
+++ b/samples/cpp/houghcircles.cpp
@@ -27,7 +27,7 @@ int main(int argc, char** argv)
 
     Mat cimg;
     medianBlur(img, img, 5);
-    cvtColor(img, cimg, CV_GRAY2BGR);
+    cvtColor(img, cimg, COLOR_GRAY2BGR);
 
     vector<Vec3f> circles;
     HoughCircles(img, circles, CV_HOUGH_GRADIENT, 1, 10,
diff --git a/samples/cpp/houghlines.cpp b/samples/cpp/houghlines.cpp
index 5ecc54ce1..681551ace 100644
--- a/samples/cpp/houghlines.cpp
+++ b/samples/cpp/houghlines.cpp
@@ -27,7 +27,7 @@ int main(int argc, char** argv)
 
     Mat dst, cdst;
     Canny(src, dst, 50, 200, 3);
-    cvtColor(dst, cdst, CV_GRAY2BGR);
+    cvtColor(dst, cdst, COLOR_GRAY2BGR);
 
 #if 0
     vector<Vec2f> lines;
diff --git a/samples/cpp/image.cpp b/samples/cpp/image.cpp
index 5647f1c78..41e2043c5 100644
--- a/samples/cpp/image.cpp
+++ b/samples/cpp/image.cpp
@@ -109,7 +109,7 @@ int main( int argc, char** argv )
     cvtColor(img_yuv, img, CV_YCrCb2BGR);
 
     // this is counterpart for cvNamedWindow
-    namedWindow("image with grain", CV_WINDOW_AUTOSIZE);
+    namedWindow("image with grain", WINDOW_AUTOSIZE);
 #if DEMO_MIXED_API_USE
     // this is to demonstrate that img and iplimg really share the data - the result of the above
     // processing is stored in img and thus in iplimg too.
diff --git a/samples/cpp/lkdemo.cpp b/samples/cpp/lkdemo.cpp
index bbb734cf3..90078f3ab 100644
--- a/samples/cpp/lkdemo.cpp
+++ b/samples/cpp/lkdemo.cpp
@@ -71,7 +71,7 @@ int main( int argc, char** argv )
             break;
 
         frame.copyTo(image);
-        cvtColor(image, gray, CV_BGR2GRAY);
+        cvtColor(image, gray, COLOR_BGR2GRAY);
 
         if( nightMode )
             image = Scalar::all(0);
diff --git a/samples/cpp/meanshift_segmentation.cpp b/samples/cpp/meanshift_segmentation.cpp
index ce34f6ea7..eb71619da 100644
--- a/samples/cpp/meanshift_segmentation.cpp
+++ b/samples/cpp/meanshift_segmentation.cpp
@@ -65,7 +65,7 @@ int main(int argc, char** argv)
     colorRad = 10;
     maxPyrLevel = 1;
 
-    namedWindow( winName, CV_WINDOW_AUTOSIZE );
+    namedWindow( winName, WINDOW_AUTOSIZE );
 
     createTrackbar( "spatialRad", winName, &spatialRad, 80, meanShiftSegmentation );
     createTrackbar( "colorRad", winName, &colorRad, 60, meanShiftSegmentation );
diff --git a/samples/cpp/minarea.cpp b/samples/cpp/minarea.cpp
index 6056c39c4..c981c7faa 100644
--- a/samples/cpp/minarea.cpp
+++ b/samples/cpp/minarea.cpp
@@ -54,7 +54,7 @@ int main( int /*argc*/, char** /*argv*/ )
 
         imshow( "rect & circle", img );
 
-        char key = (char)cvWaitKey();
+        char key = (char)waitKey();
         if( key == 27 || key == 'q' || key == 'Q' ) // 'ESC'
             break;
     }
diff --git a/samples/cpp/morphology2.cpp b/samples/cpp/morphology2.cpp
index d4ab35154..330f4e064 100644
--- a/samples/cpp/morphology2.cpp
+++ b/samples/cpp/morphology2.cpp
@@ -79,7 +79,7 @@ int main( int argc, char** argv )
 
         OpenClose(open_close_pos, 0);
         ErodeDilate(erode_dilate_pos, 0);
-        c = cvWaitKey(0);
+        c = waitKey(0);
 
         if( (char)c == 27 )
             break;
diff --git a/samples/cpp/pca.cpp b/samples/cpp/pca.cpp
index ed23c7622..be4e4a108 100644
--- a/samples/cpp/pca.cpp
+++ b/samples/cpp/pca.cpp
@@ -159,7 +159,7 @@ int main(int argc, char** argv)
 
     // init highgui window
     string winName = "Reconstruction | press 'q' to quit";
-    namedWindow(winName, CV_WINDOW_NORMAL);
+    namedWindow(winName, WINDOW_NORMAL);
 
     // params struct to pass to the trackbar handler
     params p;
diff --git a/samples/cpp/phase_corr.cpp b/samples/cpp/phase_corr.cpp
index e53f09431..8267d2ab3 100644
--- a/samples/cpp/phase_corr.cpp
+++ b/samples/cpp/phase_corr.cpp
@@ -13,7 +13,7 @@ int main(int, char* [])
     do
     {
         video >> frame;
-        cvtColor(frame, curr, CV_RGB2GRAY);
+        cvtColor(frame, curr, COLOR_RGB2GRAY);
 
         if(prev.empty())
         {
diff --git a/samples/cpp/points_classifier.cpp b/samples/cpp/points_classifier.cpp
index 8bc35e554..e23b2768a 100644
--- a/samples/cpp/points_classifier.cpp
+++ b/samples/cpp/points_classifier.cpp
@@ -7,7 +7,7 @@
 using namespace std;
 using namespace cv;
 
-const Scalar WHITE_COLOR = CV_RGB(255,255,255);
+const Scalar WHITE_COLOR = Scalar(255,255,255);
 const string winName = "points";
 const int testStep = 5;
 
@@ -69,15 +69,15 @@ static void on_mouse( int event, int x, int y, int /*flags*/, void* )
         // put the text
         stringstream text;
         text << "current class " << classColors.size()-1;
-        putText( img, text.str(), Point(10,25), CV_FONT_HERSHEY_SIMPLEX, 0.8f, WHITE_COLOR, 2 );
+        putText( img, text.str(), Point(10,25), FONT_HERSHEY_SIMPLEX, 0.8f, WHITE_COLOR, 2 );
 
         text.str("");
         text << "total classes " << classColors.size();
-        putText( img, text.str(), Point(10,50), CV_FONT_HERSHEY_SIMPLEX, 0.8f, WHITE_COLOR, 2 );
+        putText( img, text.str(), Point(10,50), FONT_HERSHEY_SIMPLEX, 0.8f, WHITE_COLOR, 2 );
 
         text.str("");
         text << "total points " << trainedPoints.size();
-        putText(img, text.str(), cvPoint(10,75), CV_FONT_HERSHEY_SIMPLEX, 0.8f, WHITE_COLOR, 2 );
+        putText(img, text.str(), Point(10,75), FONT_HERSHEY_SIMPLEX, 0.8f, WHITE_COLOR, 2 );
 
         // draw points
         for( size_t i = 0; i < trainedPoints.size(); i++ )
@@ -178,7 +178,7 @@ static void find_decision_boundary_SVM( CvSVMParams params )
     for( int i = 0; i < svmClassifier.get_support_vector_count(); i++ )
     {
         const float* supportVector = svmClassifier.get_support_vector(i);
-        circle( imgDst, Point(supportVector[0],supportVector[1]), 5, CV_RGB(255,255,255), -1 );
+        circle( imgDst, Point(supportVector[0],supportVector[1]), 5, Scalar(255,255,255), -1 );
     }
 
 }
@@ -526,7 +526,7 @@ int main()
         {
 #if _NBC_
             find_decision_boundary_NBC();
-            cvNamedWindow( "NormalBayesClassifier", WINDOW_AUTOSIZE );
+            namedWindow( "NormalBayesClassifier", WINDOW_AUTOSIZE );
             imshow( "NormalBayesClassifier", imgDst );
 #endif
 #if _KNN_
@@ -560,7 +560,7 @@ int main()
 
             params.C = 10;
             find_decision_boundary_SVM( params );
-            cvNamedWindow( "classificationSVM2", WINDOW_AUTOSIZE );
+            namedWindow( "classificationSVM2", WINDOW_AUTOSIZE );
             imshow( "classificationSVM2", imgDst );
 #endif
 
diff --git a/samples/cpp/rgbdodometry.cpp b/samples/cpp/rgbdodometry.cpp
index 660eb31d0..0db2fab84 100644
--- a/samples/cpp/rgbdodometry.cpp
+++ b/samples/cpp/rgbdodometry.cpp
@@ -125,8 +125,8 @@ int main(int argc, char** argv)
     }
 
     Mat grayImage0, grayImage1, depthFlt0, depthFlt1/*in meters*/;
-    cvtColor( colorImage0, grayImage0, CV_BGR2GRAY );
-    cvtColor( colorImage1, grayImage1, CV_BGR2GRAY );
+    cvtColor( colorImage0, grayImage0, COLOR_BGR2GRAY );
+    cvtColor( colorImage1, grayImage1, COLOR_BGR2GRAY );
     depth0.convertTo( depthFlt0, CV_32FC1, 1./1000 );
     depth1.convertTo( depthFlt1, CV_32FC1, 1./1000 );
 
diff --git a/samples/cpp/segment_objects.cpp b/samples/cpp/segment_objects.cpp
index 8195df65a..6438b8949 100644
--- a/samples/cpp/segment_objects.cpp
+++ b/samples/cpp/segment_objects.cpp
@@ -95,8 +95,6 @@ int main(int argc, char** argv)
         if( !tmp_frame.data )
             break;
         bgsubtractor(tmp_frame, bgmask, update_bg_model ? -1 : 0);
-        //CvMat _bgmask = bgmask;
-        //cvSegmentFGMask(&_bgmask);
         refineSegments(tmp_frame, bgmask, out_frame);
         imshow("video", tmp_frame);
         imshow("segmented", out_frame);
diff --git a/samples/cpp/stereo_calib.cpp b/samples/cpp/stereo_calib.cpp
index 92135d6ed..809f23bbd 100644
--- a/samples/cpp/stereo_calib.cpp
+++ b/samples/cpp/stereo_calib.cpp
@@ -118,7 +118,7 @@ StereoCalib(const vector<string>& imagelist, Size boardSize, bool useCalibrated=
             {
                 cout << filename << endl;
                 Mat cimg, cimg1;
-                cvtColor(img, cimg, CV_GRAY2BGR);
+                cvtColor(img, cimg, COLOR_GRAY2BGR);
                 drawChessboardCorners(cimg, boardSize, corners, found);
                 double sf = 640./MAX(img.rows, img.cols);
                 resize(cimg, cimg1, Size(), sf, sf);
@@ -302,7 +302,7 @@ StereoCalib(const vector<string>& imagelist, Size boardSize, bool useCalibrated=
         {
             Mat img = imread(goodImageList[i*2+k], 0), rimg, cimg;
             remap(img, rimg, rmap[k][0], rmap[k][1], CV_INTER_LINEAR);
-            cvtColor(rimg, cimg, CV_GRAY2BGR);
+            cvtColor(rimg, cimg, COLOR_GRAY2BGR);
             Mat canvasPart = !isVerticalStereo ? canvas(Rect(w*k, 0, w, h)) : canvas(Rect(0, h*k, w, h));
             resize(cimg, canvasPart, canvasPart.size(), 0, 0, CV_INTER_AREA);
             if( useCalibrated )
diff --git a/samples/cpp/tutorial_code/HighGUI/BasicLinearTransformsTrackbar.cpp b/samples/cpp/tutorial_code/HighGUI/BasicLinearTransformsTrackbar.cpp
index b344c96e0..88c1dd496 100644
--- a/samples/cpp/tutorial_code/HighGUI/BasicLinearTransformsTrackbar.cpp
+++ b/samples/cpp/tutorial_code/HighGUI/BasicLinearTransformsTrackbar.cpp
@@ -24,17 +24,14 @@ Mat image;
  */
 static void on_trackbar( int, void* )
 {
-   Mat new_image = Mat::zeros( image.size(), image.type() );
+    Mat new_image = Mat::zeros( image.size(), image.type() );
 
-   for( int y = 0; y < image.rows; y++ )
-      { for( int x = 0; x < image.cols; x++ )
-           { for( int c = 0; c < 3; c++ )
-                {
-          new_image.at<Vec3b>(y,x)[c] = saturate_cast<uchar>( alpha*( image.at<Vec3b>(y,x)[c] ) + beta );
-                }
-       }
-      }
-   imshow("New Image", new_image);
+    for( int y = 0; y < image.rows; y++ )
+        for( int x = 0; x < image.cols; x++ )
+            for( int c = 0; c < 3; c++ )
+                new_image.at<Vec3b>(y,x)[c] = saturate_cast<uchar>( alpha*( image.at<Vec3b>(y,x)[c] ) + beta );
+
+    imshow("New Image", new_image);
 }
 
 
diff --git a/samples/cpp/tutorial_code/Histograms_Matching/EqualizeHist_Demo.cpp b/samples/cpp/tutorial_code/Histograms_Matching/EqualizeHist_Demo.cpp
index 49e4be0e5..38a5839bb 100644
--- a/samples/cpp/tutorial_code/Histograms_Matching/EqualizeHist_Demo.cpp
+++ b/samples/cpp/tutorial_code/Histograms_Matching/EqualizeHist_Demo.cpp
@@ -31,14 +31,14 @@ int main( int, char** argv )
     }
 
   /// Convert to grayscale
-  cvtColor( src, src, CV_BGR2GRAY );
+  cvtColor( src, src, COLOR_BGR2GRAY );
 
   /// Apply Histogram Equalization
   equalizeHist( src, dst );
 
   /// Display results
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
-  namedWindow( equalized_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
+  namedWindow( equalized_window, WINDOW_AUTOSIZE );
 
   imshow( source_window, src );
   imshow( equalized_window, dst );
diff --git a/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp b/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp
index e3633576f..ca318be55 100644
--- a/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp
+++ b/samples/cpp/tutorial_code/Histograms_Matching/MatchTemplate_Demo.cpp
@@ -33,8 +33,8 @@ int main( int, char** argv )
   templ = imread( argv[2], 1 );
 
   /// Create windows
-  namedWindow( image_window, CV_WINDOW_AUTOSIZE );
-  namedWindow( result_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( image_window, WINDOW_AUTOSIZE );
+  namedWindow( result_window, WINDOW_AUTOSIZE );
 
   /// Create Trackbar
   const char* trackbar_label = "Method: \n 0: SQDIFF \n 1: SQDIFF NORMED \n 2: TM CCORR \n 3: TM CCORR NORMED \n 4: TM COEFF \n 5: TM COEFF NORMED";
diff --git a/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp b/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp
index f42260374..86d2f2e15 100644
--- a/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp
+++ b/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo1.cpp
@@ -28,7 +28,7 @@ int main( int, char** argv )
   /// Read the image
   src = imread( argv[1], 1 );
   /// Transform it to HSV
-  cvtColor( src, hsv, CV_BGR2HSV );
+  cvtColor( src, hsv, COLOR_BGR2HSV );
 
   /// Use only the Hue value
   hue.create( hsv.size(), hsv.depth() );
@@ -37,7 +37,7 @@ int main( int, char** argv )
 
   /// Create Trackbar to enter the number of bins
   const char* window_image = "Source image";
-  namedWindow( window_image, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_image, WINDOW_AUTOSIZE );
   createTrackbar("* Hue  bins: ", window_image, &bins, 180, Hist_and_Backproj );
   Hist_and_Backproj(0, 0);
 
diff --git a/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo2.cpp b/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo2.cpp
index 42dd01a67..85f18db9f 100644
--- a/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo2.cpp
+++ b/samples/cpp/tutorial_code/Histograms_Matching/calcBackProject_Demo2.cpp
@@ -31,10 +31,10 @@ int main( int, char** argv )
   /// Read the image
   src = imread( argv[1], 1 );
   /// Transform it to HSV
-  cvtColor( src, hsv, CV_BGR2HSV );
+  cvtColor( src, hsv, COLOR_BGR2HSV );
 
   /// Show the image
-  namedWindow( window_image, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_image, WINDOW_AUTOSIZE );
   imshow( window_image, src );
 
   /// Set Trackbars for floodfill thresholds
diff --git a/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp b/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp
index d3af5e7a9..577c8a8b9 100644
--- a/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp
+++ b/samples/cpp/tutorial_code/Histograms_Matching/calcHist_Demo.cpp
@@ -71,7 +71,7 @@ int main( int, char** argv )
   }
 
   /// Display
-  namedWindow("calcHist Demo", CV_WINDOW_AUTOSIZE );
+  namedWindow("calcHist Demo", WINDOW_AUTOSIZE );
   imshow("calcHist Demo", histImage );
 
   waitKey(0);
diff --git a/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp b/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp
index b18aab2e8..f4dd4e5e4 100644
--- a/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp
+++ b/samples/cpp/tutorial_code/Histograms_Matching/compareHist_Demo.cpp
@@ -22,24 +22,25 @@ int main( int argc, char** argv )
     Mat src_test2, hsv_test2;
     Mat hsv_half_down;
 
-   /// Load three images with different environment settings
-   if( argc < 4 )
-     { printf("** Error. Usage: ./compareHist_Demo <image_settings0> <image_setting1> <image_settings2>\n");
-       return -1;
-     }
+    /// Load three images with different environment settings
+    if( argc < 4 )
+    {
+        printf("** Error. Usage: ./compareHist_Demo <image_settings0> <image_setting1> <image_settings2>\n");
+        return -1;
+    }
 
-   src_base = imread( argv[1], 1 );
-   src_test1 = imread( argv[2], 1 );
-   src_test2 = imread( argv[3], 1 );
+    src_base = imread( argv[1], 1 );
+    src_test1 = imread( argv[2], 1 );
+    src_test2 = imread( argv[3], 1 );
 
-   /// Convert to HSV
-   cvtColor( src_base, hsv_base, CV_BGR2HSV );
-   cvtColor( src_test1, hsv_test1, CV_BGR2HSV );
-   cvtColor( src_test2, hsv_test2, CV_BGR2HSV );
+    /// Convert to HSV
+    cvtColor( src_base, hsv_base, COLOR_BGR2HSV );
+    cvtColor( src_test1, hsv_test1, COLOR_BGR2HSV );
+    cvtColor( src_test2, hsv_test2, COLOR_BGR2HSV );
 
-   hsv_half_down = hsv_base( Range( hsv_base.rows/2, hsv_base.rows - 1 ), Range( 0, hsv_base.cols - 1 ) );
+    hsv_half_down = hsv_base( Range( hsv_base.rows/2, hsv_base.rows - 1 ), Range( 0, hsv_base.cols - 1 ) );
 
-   /// Using 30 bins for hue and 32 for saturation
+    /// Using 30 bins for hue and 32 for saturation
     int h_bins = 50; int s_bins = 60;
     int histSize[] = { h_bins, s_bins };
 
@@ -74,14 +75,15 @@ int main( int argc, char** argv )
 
     /// Apply the histogram comparison methods
     for( int i = 0; i < 4; i++ )
-       { int compare_method = i;
-         double base_base = compareHist( hist_base, hist_base, compare_method );
-         double base_half = compareHist( hist_base, hist_half_down, compare_method );
-         double base_test1 = compareHist( hist_base, hist_test1, compare_method );
-         double base_test2 = compareHist( hist_base, hist_test2, compare_method );
+    {
+        int compare_method = i;
+        double base_base = compareHist( hist_base, hist_base, compare_method );
+        double base_half = compareHist( hist_base, hist_half_down, compare_method );
+        double base_test1 = compareHist( hist_base, hist_test1, compare_method );
+        double base_test2 = compareHist( hist_base, hist_test2, compare_method );
 
-         printf( " Method [%d] Perfect, Base-Half, Base-Test(1), Base-Test(2) : %f, %f, %f, %f \n", i, base_base, base_half , base_test1, base_test2 );
-       }
+        printf( " Method [%d] Perfect, Base-Half, Base-Test(1), Base-Test(2) : %f, %f, %f, %f \n", i, base_base, base_half , base_test1, base_test2 );
+    }
 
     printf( "Done \n" );
 
diff --git a/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp b/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp
index 47030fa4f..13a96a1f5 100644
--- a/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Morphology_1.cpp
@@ -37,9 +37,9 @@ int main( int, char** argv )
     { return -1; }
 
   /// Create windows
-  namedWindow( "Erosion Demo", CV_WINDOW_AUTOSIZE );
-  namedWindow( "Dilation Demo", CV_WINDOW_AUTOSIZE );
-  cvMoveWindow( "Dilation Demo", src.cols, 0 );
+  namedWindow( "Erosion Demo", WINDOW_AUTOSIZE );
+  namedWindow( "Dilation Demo", WINDOW_AUTOSIZE );
+  moveWindow( "Dilation Demo", src.cols, 0 );
 
   /// Create Erosion Trackbar
   createTrackbar( "Element:\n 0: Rect \n 1: Cross \n 2: Ellipse", "Erosion Demo",
diff --git a/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp b/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp
index 2a1327d9d..a963bf83e 100644
--- a/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Morphology_2.cpp
@@ -39,20 +39,20 @@ int main( int, char** argv )
     { return -1; }
 
   /// Create window
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Create Trackbar to select Morphology operation
   createTrackbar("Operator:\n 0: Opening - 1: Closing  \n 2: Gradient - 3: Top Hat \n 4: Black Hat", window_name, &morph_operator, max_operator, Morphology_Operations );
 
   /// Create Trackbar to select kernel type
   createTrackbar( "Element:\n 0: Rect - 1: Cross - 2: Ellipse", window_name,
-          &morph_elem, max_elem,
-          Morphology_Operations );
+                  &morph_elem, max_elem,
+                  Morphology_Operations );
 
   /// Create Trackbar to choose kernel size
   createTrackbar( "Kernel size:\n 2n +1", window_name,
-          &morph_size, max_kernel_size,
-          Morphology_Operations );
+                  &morph_size, max_kernel_size,
+                  Morphology_Operations );
 
   /// Default start
   Morphology_Operations( 0, 0 );
diff --git a/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp b/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
index 0c9052095..fc98d1c21 100644
--- a/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Pyramids.cpp
@@ -40,7 +40,7 @@ int main( void )
   dst = tmp;
 
   /// Create window
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
   imshow( window_name, dst );
 
   /// Loop
diff --git a/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp b/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp
index 5f51d07d2..8513bcf76 100644
--- a/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Smoothing.cpp
@@ -31,7 +31,7 @@ int display_dst( int delay );
  */
 int main( void )
 {
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Load the source image
   src = imread( "../images/lena.png", 1 );
@@ -89,7 +89,7 @@ int display_caption( const char* caption )
   dst = Mat::zeros( src.size(), src.type() );
   putText( dst, caption,
            Point( src.cols/4, src.rows/2),
-           CV_FONT_HERSHEY_COMPLEX, 1, Scalar(255, 255, 255) );
+           FONT_HERSHEY_COMPLEX, 1, Scalar(255, 255, 255) );
 
   imshow( window_name, dst );
   int c = waitKey( DELAY_CAPTION );
diff --git a/samples/cpp/tutorial_code/ImgProc/Threshold.cpp b/samples/cpp/tutorial_code/ImgProc/Threshold.cpp
index 7505ec297..d98cc1182 100644
--- a/samples/cpp/tutorial_code/ImgProc/Threshold.cpp
+++ b/samples/cpp/tutorial_code/ImgProc/Threshold.cpp
@@ -37,19 +37,19 @@ int main( int, char** argv )
   src = imread( argv[1], 1 );
 
   /// Convert the image to Gray
-  cvtColor( src, src_gray, CV_RGB2GRAY );
+  cvtColor( src, src_gray, COLOR_RGB2GRAY );
 
   /// Create a window to display results
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Create Trackbar to choose type of Threshold
   createTrackbar( trackbar_type,
-          window_name, &threshold_type,
-          max_type, Threshold_Demo );
+                  window_name, &threshold_type,
+                  max_type, Threshold_Demo );
 
   createTrackbar( trackbar_value,
-          window_name, &threshold_value,
-          max_value, Threshold_Demo );
+                  window_name, &threshold_value,
+                  max_value, Threshold_Demo );
 
   /// Call the function to initialize
   Threshold_Demo( 0, 0 );
diff --git a/samples/cpp/tutorial_code/ImgTrans/CannyDetector_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/CannyDetector_Demo.cpp
index c798f2fb4..7851c9f58 100644
--- a/samples/cpp/tutorial_code/ImgTrans/CannyDetector_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/CannyDetector_Demo.cpp
@@ -58,10 +58,10 @@ int main( int, char** argv )
   dst.create( src.size(), src.type() );
 
   /// Convert the image to grayscale
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
 
   /// Create a window
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Create a Trackbar for user to enter threshold
   createTrackbar( "Min Threshold:", window_name, &lowThreshold, max_lowThreshold, CannyThreshold );
diff --git a/samples/cpp/tutorial_code/ImgTrans/Geometric_Transforms_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/Geometric_Transforms_Demo.cpp
index 2d1835720..0a2a2f5fa 100644
--- a/samples/cpp/tutorial_code/ImgTrans/Geometric_Transforms_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/Geometric_Transforms_Demo.cpp
@@ -65,13 +65,13 @@ int main( int, char** argv )
 
 
   /// Show what you got
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, src );
 
-  namedWindow( warp_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( warp_window, WINDOW_AUTOSIZE );
   imshow( warp_window, warp_dst );
 
-  namedWindow( warp_rotate_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( warp_rotate_window, WINDOW_AUTOSIZE );
   imshow( warp_rotate_window, warp_rotate_dst );
 
   /// Wait until user exits the program
diff --git a/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp
index 71d37fb7a..da75026e4 100644
--- a/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/HoughCircle_Demo.cpp
@@ -25,7 +25,7 @@ int main(int, char** argv)
      { return -1; }
 
    /// Convert it to gray
-    cvtColor( src, src_gray, CV_BGR2GRAY );
+    cvtColor( src, src_gray, COLOR_BGR2GRAY );
 
    /// Reduce the noise so we avoid false circle detection
     GaussianBlur( src_gray, src_gray, Size(9, 9), 2, 2 );
@@ -47,7 +47,7 @@ int main(int, char** argv)
     }
 
    /// Show your results
-    namedWindow( "Hough Circle Transform Demo", CV_WINDOW_AUTOSIZE );
+    namedWindow( "Hough Circle Transform Demo", WINDOW_AUTOSIZE );
     imshow( "Hough Circle Transform Demo", src );
 
     waitKey(0);
diff --git a/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp
index 561948a58..ee30771d5 100644
--- a/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/HoughLines_Demo.cpp
@@ -46,7 +46,7 @@ int main( int, char** argv )
      }
 
    /// Pass the image to gray
-   cvtColor( src, src_gray, CV_RGB2GRAY );
+   cvtColor( src, src_gray, COLOR_RGB2GRAY );
 
    /// Apply Canny edge detector
    Canny( src_gray, edges, 50, 200, 3 );
@@ -55,10 +55,10 @@ int main( int, char** argv )
    char thresh_label[50];
    sprintf( thresh_label, "Thres: %d + input", min_threshold );
 
-   namedWindow( standard_name, CV_WINDOW_AUTOSIZE );
+   namedWindow( standard_name, WINDOW_AUTOSIZE );
    createTrackbar( thresh_label, standard_name, &s_trackbar, max_trackbar, Standard_Hough);
 
-   namedWindow( probabilistic_name, CV_WINDOW_AUTOSIZE );
+   namedWindow( probabilistic_name, WINDOW_AUTOSIZE );
    createTrackbar( thresh_label, probabilistic_name, &p_trackbar, max_trackbar, Probabilistic_Hough);
 
    /// Initialize
diff --git a/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp
index 279dc6dcb..f6dff102d 100644
--- a/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/Laplace_Demo.cpp
@@ -34,10 +34,10 @@ int main( int, char** argv )
   GaussianBlur( src, src, Size(3,3), 0, 0, BORDER_DEFAULT );
 
   /// Convert the image to grayscale
-  cvtColor( src, src_gray, CV_RGB2GRAY );
+  cvtColor( src, src_gray, COLOR_RGB2GRAY );
 
   /// Create window
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Apply Laplace function
   Mat abs_dst;
diff --git a/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp
index 40e5118a6..66ed0f387 100644
--- a/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/Remap_Demo.cpp
@@ -34,7 +34,7 @@ int main( int, char** argv )
   map_y.create( src.size(), CV_32FC1 );
 
   /// Create window
-  namedWindow( remap_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( remap_window, WINDOW_AUTOSIZE );
 
   /// Loop
   for(;;)
diff --git a/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp b/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
index cbd03e8b9..f8c97c411 100644
--- a/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/Sobel_Demo.cpp
@@ -33,10 +33,10 @@ int main( int, char** argv )
   GaussianBlur( src, src, Size(3,3), 0, 0, BORDER_DEFAULT );
 
   /// Convert it to gray
-  cvtColor( src, src_gray, CV_RGB2GRAY );
+  cvtColor( src, src_gray, COLOR_RGB2GRAY );
 
   /// Create window
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Generate grad_x and grad_y
   Mat grad_x, grad_y;
diff --git a/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp b/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
index 0d84e738c..0a441ccdf 100644
--- a/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/copyMakeBorder_demo.cpp
@@ -43,7 +43,7 @@ int main( int, char** argv )
   printf( " ** Press 'ESC' to exit the program \n");
 
   /// Create window
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Initialize arguments for the filter
   top = (int) (0.05*src.rows); bottom = (int) (0.05*src.rows);
diff --git a/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp b/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp
index 7a8dc768c..86db4d8ed 100644
--- a/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp
+++ b/samples/cpp/tutorial_code/ImgTrans/filter2D_demo.cpp
@@ -35,7 +35,7 @@ int main ( int, char** argv )
     { return -1; }
 
   /// Create window
-  namedWindow( window_name, CV_WINDOW_AUTOSIZE );
+  namedWindow( window_name, WINDOW_AUTOSIZE );
 
   /// Initialize arguments for the filter
   anchor = Point( -1, -1 );
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp
index e301476ee..bff125f2f 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/findContours_demo.cpp
@@ -30,12 +30,12 @@ int main( int, char** argv )
   src = imread( argv[1], 1 );
 
   /// Convert image to gray and blur it
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
   blur( src_gray, src_gray, Size(3,3) );
 
   /// Create Window
   const char* source_window = "Source";
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, src );
 
   createTrackbar( " Canny thresh:", "Source", &thresh, max_thresh, thresh_callback );
@@ -68,6 +68,6 @@ void thresh_callback(int, void* )
      }
 
   /// Show in a window
-  namedWindow( "Contours", CV_WINDOW_AUTOSIZE );
+  namedWindow( "Contours", WINDOW_AUTOSIZE );
   imshow( "Contours", drawing );
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp
index b973cfd97..a75292273 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo1.cpp
@@ -30,12 +30,12 @@ int main( int, char** argv )
   src = imread( argv[1], 1 );
 
   /// Convert image to gray and blur it
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
   blur( src_gray, src_gray, Size(3,3) );
 
   /// Create Window
   const char* source_window = "Source";
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, src );
 
   createTrackbar( " Threshold:", "Source", &thresh, max_thresh, thresh_callback );
@@ -83,6 +83,6 @@ void thresh_callback(int, void* )
      }
 
   /// Show in a window
-  namedWindow( "Contours", CV_WINDOW_AUTOSIZE );
+  namedWindow( "Contours", WINDOW_AUTOSIZE );
   imshow( "Contours", drawing );
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp
index 70d8663c9..f21b7189f 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/generalContours_demo2.cpp
@@ -30,12 +30,12 @@ int main( int, char** argv )
   src = imread( argv[1], 1 );
 
   /// Convert image to gray and blur it
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
   blur( src_gray, src_gray, Size(3,3) );
 
   /// Create Window
   const char* source_window = "Source";
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, src );
 
   createTrackbar( " Threshold:", "Source", &thresh, max_thresh, thresh_callback );
@@ -85,6 +85,6 @@ void thresh_callback(int, void* )
      }
 
   /// Show in a window
-  namedWindow( "Contours", CV_WINDOW_AUTOSIZE );
+  namedWindow( "Contours", WINDOW_AUTOSIZE );
   imshow( "Contours", drawing );
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp
index 8fe5d5b07..e38003e9a 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/hull_demo.cpp
@@ -30,12 +30,12 @@ int main( int, char** argv )
   src = imread( argv[1], 1 );
 
   /// Convert image to gray and blur it
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
   blur( src_gray, src_gray, Size(3,3) );
 
   /// Create Window
   const char* source_window = "Source";
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, src );
 
   createTrackbar( " Threshold:", "Source", &thresh, max_thresh, thresh_callback );
@@ -62,7 +62,7 @@ void thresh_callback(int, void* )
   findContours( threshold_output, contours, hierarchy, CV_RETR_TREE, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
 
   /// Find the convex hull object for each contour
- vector<vector<Point> >hull( contours.size() );
+  vector<vector<Point> >hull( contours.size() );
   for( size_t i = 0; i < contours.size(); i++ )
      {   convexHull( Mat(contours[i]), hull[i], false ); }
 
@@ -76,6 +76,6 @@ void thresh_callback(int, void* )
      }
 
   /// Show in a window
-  namedWindow( "Hull demo", CV_WINDOW_AUTOSIZE );
+  namedWindow( "Hull demo", WINDOW_AUTOSIZE );
   imshow( "Hull demo", drawing );
 }
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
index 941edccd2..9f0d286b9 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/moments_demo.cpp
@@ -30,12 +30,12 @@ int main( int, char** argv )
   src = imread( argv[1], 1 );
 
   /// Convert image to gray and blur it
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
   blur( src_gray, src_gray, Size(3,3) );
 
   /// Create Window
   const char* source_window = "Source";
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, src );
 
   createTrackbar( " Canny thresh:", "Source", &thresh, max_thresh, thresh_callback );
@@ -79,7 +79,7 @@ void thresh_callback(int, void* )
      }
 
   /// Show in a window
-  namedWindow( "Contours", CV_WINDOW_AUTOSIZE );
+  namedWindow( "Contours", WINDOW_AUTOSIZE );
   imshow( "Contours", drawing );
 
   /// Calculate the area with the moments 00 and compare with the result of the OpenCV function
diff --git a/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp b/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
index 25195d3e4..f55f8f687 100644
--- a/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
+++ b/samples/cpp/tutorial_code/ShapeDescriptors/pointPolygonTest_demo.cpp
@@ -71,9 +71,9 @@ int main( void )
 
   /// Create Window and show your results
   const char* source_window = "Source";
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, src );
-  namedWindow( "Distance", CV_WINDOW_AUTOSIZE );
+  namedWindow( "Distance", WINDOW_AUTOSIZE );
   imshow( "Distance", drawing );
 
   waitKey(0);
diff --git a/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp b/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp
index 5a37da4d4..31c700478 100644
--- a/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp
+++ b/samples/cpp/tutorial_code/TrackingMotion/cornerDetector_Demo.cpp
@@ -40,7 +40,7 @@ int main( int, char** argv )
 {
   /// Load source image and convert it to gray
   src = imread( argv[1], 1 );
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
 
   /// Set some parameters
   int blockSize = 3; int apertureSize = 3;
@@ -64,7 +64,7 @@ int main( int, char** argv )
   minMaxLoc( Mc, &myHarris_minVal, &myHarris_maxVal, 0, 0, Mat() );
 
   /* Create Window and Trackbar */
-  namedWindow( myHarris_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( myHarris_window, WINDOW_AUTOSIZE );
   createTrackbar( " Quality Level:", myHarris_window, &myHarris_qualityLevel, max_qualityLevel, myHarris_function );
   myHarris_function( 0, 0 );
 
@@ -75,7 +75,7 @@ int main( int, char** argv )
   minMaxLoc( myShiTomasi_dst, &myShiTomasi_minVal, &myShiTomasi_maxVal, 0, 0, Mat() );
 
   /* Create Window and Trackbar */
-  namedWindow( myShiTomasi_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( myShiTomasi_window, WINDOW_AUTOSIZE );
   createTrackbar( " Quality Level:", myShiTomasi_window, &myShiTomasi_qualityLevel, max_qualityLevel, myShiTomasi_function );
   myShiTomasi_function( 0, 0 );
 
diff --git a/samples/cpp/tutorial_code/TrackingMotion/cornerHarris_Demo.cpp b/samples/cpp/tutorial_code/TrackingMotion/cornerHarris_Demo.cpp
index 667ad7e32..e048f057b 100644
--- a/samples/cpp/tutorial_code/TrackingMotion/cornerHarris_Demo.cpp
+++ b/samples/cpp/tutorial_code/TrackingMotion/cornerHarris_Demo.cpp
@@ -31,10 +31,10 @@ int main( int, char** argv )
 {
   /// Load source image and convert it to gray
   src = imread( argv[1], 1 );
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
 
   /// Create a window and a trackbar
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   createTrackbar( "Threshold: ", source_window, &thresh, max_thresh, cornerHarris_demo );
   imshow( source_window, src );
 
@@ -77,6 +77,6 @@ void cornerHarris_demo( int, void* )
           }
      }
   /// Showing the result
-  namedWindow( corners_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( corners_window, WINDOW_AUTOSIZE );
   imshow( corners_window, dst_norm_scaled );
 }
diff --git a/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp b/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp
index 45da0f955..f695a2830 100644
--- a/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp
+++ b/samples/cpp/tutorial_code/TrackingMotion/cornerSubPix_Demo.cpp
@@ -32,10 +32,10 @@ int main( int, char** argv )
 {
   /// Load source image and convert it to gray
   src = imread( argv[1], 1 );
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
 
   /// Create Window
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
 
   /// Create Trackbar to set the number of corners
   createTrackbar( "Max  corners:", source_window, &maxCorners, maxTrackbar, goodFeaturesToTrack_Demo );
@@ -87,7 +87,7 @@ void goodFeaturesToTrack_Demo( int, void* )
      { circle( copy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255), rng.uniform(0,255)), -1, 8, 0 ); }
 
   /// Show what you got
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, copy );
 
   /// Set the neeed parameters to find the refined corners
diff --git a/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp b/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp
index f20669f2b..b45d60a08 100644
--- a/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp
+++ b/samples/cpp/tutorial_code/TrackingMotion/goodFeaturesToTrack_Demo.cpp
@@ -32,10 +32,10 @@ int main( int, char** argv )
 {
   /// Load source image and convert it to gray
   src = imread( argv[1], 1 );
-  cvtColor( src, src_gray, CV_BGR2GRAY );
+  cvtColor( src, src_gray, COLOR_BGR2GRAY );
 
   /// Create Window
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
 
   /// Create Trackbar to set the number of corners
   createTrackbar( "Max  corners:", source_window, &maxCorners, maxTrackbar, goodFeaturesToTrack_Demo );
@@ -87,6 +87,6 @@ void goodFeaturesToTrack_Demo( int, void* )
      { circle( copy, corners[i], r, Scalar(rng.uniform(0,255), rng.uniform(0,255), rng.uniform(0,255)), -1, 8, 0 ); }
 
   /// Show what you got
-  namedWindow( source_window, CV_WINDOW_AUTOSIZE );
+  namedWindow( source_window, WINDOW_AUTOSIZE );
   imshow( source_window, copy );
 }
diff --git a/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp b/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp
index 5cc15b305..6df0c1677 100644
--- a/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp
+++ b/samples/cpp/tutorial_code/calib3d/camera_calibration/camera_calibration.cpp
@@ -294,7 +294,7 @@ int main(int argc, char* argv[])
                 if( s.calibrationPattern == Settings::CHESSBOARD)
                 {
                     Mat viewGray;
-                    cvtColor(view, viewGray, CV_BGR2GRAY);
+                    cvtColor(view, viewGray, COLOR_BGR2GRAY);
                     cornerSubPix( viewGray, pointBuf, Size(11,11),
                         Size(-1,-1), TermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 30, 0.1 ));
                 }
diff --git a/samples/cpp/tutorial_code/calib3d/stereoBM/SBM_Sample.cpp b/samples/cpp/tutorial_code/calib3d/stereoBM/SBM_Sample.cpp
index 13d5aa870..9f8725476 100644
--- a/samples/cpp/tutorial_code/calib3d/stereoBM/SBM_Sample.cpp
+++ b/samples/cpp/tutorial_code/calib3d/stereoBM/SBM_Sample.cpp
@@ -56,7 +56,7 @@ int main( int argc, char** argv )
   //-- 4. Display it as a CV_8UC1 image
   imgDisparity16S.convertTo( imgDisparity8U, CV_8UC1, 255/(maxVal - minVal));
 
-  namedWindow( windowDisparity, CV_WINDOW_NORMAL );
+  namedWindow( windowDisparity, WINDOW_NORMAL );
   imshow( windowDisparity, imgDisparity8U );
 
   //-- 5. Save the image
diff --git a/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp b/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp
index ef466e5f2..5cad99208 100644
--- a/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp
+++ b/samples/cpp/tutorial_code/core/Matrix/Drawing_1.cpp
@@ -64,9 +64,9 @@ int main( void ){
 
   /// 3. Display your stuff!
   imshow( atom_window, atom_image );
-  cvMoveWindow( atom_window, 0, 200 );
+  moveWindow( atom_window, 0, 200 );
   imshow( rook_window, rook_image );
-  cvMoveWindow( rook_window, w, 200 );
+  moveWindow( rook_window, w, 200 );
 
   waitKey( 0 );
   return(0);
diff --git a/samples/cpp/tutorial_code/core/Matrix/Drawing_2.cpp b/samples/cpp/tutorial_code/core/Matrix/Drawing_2.cpp
index c84eccb69..844bcd27d 100644
--- a/samples/cpp/tutorial_code/core/Matrix/Drawing_2.cpp
+++ b/samples/cpp/tutorial_code/core/Matrix/Drawing_2.cpp
@@ -304,7 +304,7 @@ int Displaying_Random_Text( Mat image, char* window_name, RNG rng )
  */
 int Displaying_Big_End( Mat image, char* window_name, RNG )
 {
-  Size textsize = getTextSize("OpenCV forever!", CV_FONT_HERSHEY_COMPLEX, 3, 5, 0);
+  Size textsize = getTextSize("OpenCV forever!", FONT_HERSHEY_COMPLEX, 3, 5, 0);
   Point org((window_width - textsize.width)/2, (window_height - textsize.height)/2);
   int lineType = 8;
 
@@ -313,7 +313,7 @@ int Displaying_Big_End( Mat image, char* window_name, RNG )
   for( int i = 0; i < 255; i += 2 )
   {
     image2 = image - Scalar::all(i);
-    putText( image2, "OpenCV forever!", org, CV_FONT_HERSHEY_COMPLEX, 3,
+    putText( image2, "OpenCV forever!", org, FONT_HERSHEY_COMPLEX, 3,
              Scalar(i, i, 255), 5, lineType );
 
     imshow( window_name, image2 );
diff --git a/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp b/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp
index aeeaf2110..4a9be8dc1 100644
--- a/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp
+++ b/samples/cpp/tutorial_code/core/interoperability_with_OpenCV_1/interoperability_with_OpenCV_1.cpp
@@ -45,7 +45,7 @@ int main( int argc, char** argv )
 
     // convert image to YUV color space. The output image will be created automatically.
     Mat I_YUV;
-    cvtColor(I, I_YUV, CV_BGR2YCrCb);
+    cvtColor(I, I_YUV, COLOR_BGR2YCrCb);
 
     vector<Mat> planes;    // Use the STL's vector structure to store multiple Mat objects
     split(I_YUV, planes);  // split the image into separate color planes (Y U V)
@@ -117,7 +117,7 @@ int main( int argc, char** argv )
     cvtColor(I_YUV, I, CV_YCrCb2BGR);  // and produce the output RGB image
 
 
-    namedWindow("image with grain", CV_WINDOW_AUTOSIZE);   // use this to create images
+    namedWindow("image with grain", WINDOW_AUTOSIZE);   // use this to create images
 
 #ifdef DEMO_MIXED_API_USE
     // this is to demonstrate that I and IplI really share the data - the result of the above
diff --git a/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp b/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp
index 555d668ec..a446cfd2b 100644
--- a/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp
+++ b/samples/cpp/tutorial_code/core/mat_mask_operations/mat_mask_operations.cpp
@@ -30,8 +30,8 @@ int main( int argc, char* argv[])
     else
         I = imread( filename, CV_LOAD_IMAGE_COLOR);
 
-    namedWindow("Input", CV_WINDOW_AUTOSIZE);
-    namedWindow("Output", CV_WINDOW_AUTOSIZE);
+    namedWindow("Input", WINDOW_AUTOSIZE);
+    namedWindow("Output", WINDOW_AUTOSIZE);
 
     imshow("Input", I);
     double t = (double)getTickCount();
@@ -42,7 +42,7 @@ int main( int argc, char* argv[])
     cout << "Hand written function times passed in seconds: " << t << endl;
 
     imshow("Output", J);
-    cvWaitKey(0);
+    waitKey(0);
 
     Mat kern = (Mat_<char>(3,3) <<  0, -1,  0,
                                    -1,  5, -1,
@@ -54,7 +54,7 @@ int main( int argc, char* argv[])
 
     imshow("Output", K);
 
-    cvWaitKey(0);
+    waitKey(0);
     return 0;
 }
 void Sharpen(const Mat& myImage,Mat& Result)
diff --git a/samples/cpp/tutorial_code/features2D/SURF_Homography.cpp b/samples/cpp/tutorial_code/features2D/SURF_Homography.cpp
index 506e3b44f..4b29638a5 100644
--- a/samples/cpp/tutorial_code/features2D/SURF_Homography.cpp
+++ b/samples/cpp/tutorial_code/features2D/SURF_Homography.cpp
@@ -95,8 +95,8 @@ int main( int argc, char** argv )
 
   //-- Get the corners from the image_1 ( the object to be "detected" )
   std::vector<Point2f> obj_corners(4);
-  obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( img_object.cols, 0 );
-  obj_corners[2] = cvPoint( img_object.cols, img_object.rows ); obj_corners[3] = cvPoint( 0, img_object.rows );
+  obj_corners[0] = Point(0,0); obj_corners[1] = Point( img_object.cols, 0 );
+  obj_corners[2] = Point( img_object.cols, img_object.rows ); obj_corners[3] = Point( 0, img_object.rows );
   std::vector<Point2f> scene_corners(4);
 
   perspectiveTransform( obj_corners, scene_corners, H);
diff --git a/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp b/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
index 8990bac28..ef8abfd7f 100644
--- a/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
+++ b/samples/cpp/tutorial_code/introduction/display_image/display_image.cpp
@@ -22,7 +22,7 @@ int main( int argc, char** argv )
         return -1;
     }
 
-    namedWindow( "Display window", CV_WINDOW_AUTOSIZE );// Create a window for display.
+    namedWindow( "Display window", WINDOW_AUTOSIZE );// Create a window for display.
     imshow( "Display window", image );                   // Show our image inside it.
 
     waitKey(0);											 // Wait for a keystroke in the window
diff --git a/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp b/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
index cf3538356..240a2e198 100644
--- a/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
+++ b/samples/cpp/tutorial_code/introduction/windows_visual_studio_Opencv/Test.cpp
@@ -74,10 +74,10 @@ int main(int argc, char *argv[])
     const char* WIN_RF = "Reference";
 
     // Windows
-            namedWindow(WIN_RF, CV_WINDOW_AUTOSIZE );
-            namedWindow(WIN_UT, CV_WINDOW_AUTOSIZE );
-            cvMoveWindow(WIN_RF, 400       ,            0);		 //750,  2 (bernat =0)
-            cvMoveWindow(WIN_UT, refS.width,            0);		 //1500, 2
+            namedWindow(WIN_RF, WINDOW_AUTOSIZE );
+            namedWindow(WIN_UT, WINDOW_AUTOSIZE );
+            moveWindow(WIN_RF, 400       ,            0);		 //750,  2 (bernat =0)
+            moveWindow(WIN_UT, refS.width,            0);		 //1500, 2
 
     cout << "Frame resolution: Width=" << refS.width << "  Height=" << refS.height
          << " of nr#: " << captRefrnc.get(CV_CAP_PROP_FRAME_COUNT) << endl;
@@ -124,7 +124,7 @@ int main(int argc, char *argv[])
         imshow( WIN_RF, frameReference);
         imshow( WIN_UT, frameUnderTest);
 
-        c = (char)cvWaitKey(delay);
+        c = (char)waitKey(delay);
         if (c == 27) break;
     }
 
diff --git a/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp b/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
index e20a7d4ca..c2465c40e 100644
--- a/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
+++ b/samples/cpp/tutorial_code/objectDetection/objectDetection.cpp
@@ -30,7 +30,7 @@ RNG rng(12345);
  */
 int main( void )
 {
-  CvCapture* capture;
+  VideoCapture capture;
   Mat frame;
 
   //-- 1. Load the cascades
@@ -38,12 +38,12 @@ int main( void )
   if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
 
   //-- 2. Read the video stream
-  capture = cvCaptureFromCAM( -1 );
-  if( capture )
+  capture.open( -1 );
+  if( capture.isOpened() )
   {
     for(;;)
     {
-      frame = cvQueryFrame( capture );
+      capture >> frame;
 
       //-- 3. Apply the classifier to the frame
       if( !frame.empty() )
@@ -67,7 +67,7 @@ void detectAndDisplay( Mat frame )
    std::vector<Rect> faces;
    Mat frame_gray;
 
-   cvtColor( frame, frame_gray, CV_BGR2GRAY );
+   cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
    equalizeHist( frame_gray, frame_gray );
    //-- Detect faces
    face_cascade.detectMultiScale( frame_gray, faces, 1.1, 2, 0|CV_HAAR_SCALE_IMAGE, Size(30, 30) );
diff --git a/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp b/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp
index 75167f612..bfa1f1538 100644
--- a/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp
+++ b/samples/cpp/tutorial_code/objectDetection/objectDetection2.cpp
@@ -30,7 +30,7 @@ RNG rng(12345);
  */
 int main( void )
 {
-  CvCapture* capture;
+  VideoCapture capture;
   Mat frame;
 
   //-- 1. Load the cascade
@@ -38,12 +38,12 @@ int main( void )
   if( !eyes_cascade.load( eyes_cascade_name ) ){ printf("--(!)Error loading\n"); return -1; };
 
   //-- 2. Read the video stream
-  capture = cvCaptureFromCAM( -1 );
-  if( capture )
+  capture.open( -1 );
+  if( capture.isOpened() )
   {
     for(;;)
     {
-      frame = cvQueryFrame( capture );
+      capture >> frame;
 
       //-- 3. Apply the classifier to the frame
       if( !frame.empty() )
@@ -67,7 +67,7 @@ void detectAndDisplay( Mat frame )
    std::vector<Rect> faces;
    Mat frame_gray;
 
-   cvtColor( frame, frame_gray, CV_BGR2GRAY );
+   cvtColor( frame, frame_gray, COLOR_BGR2GRAY );
    equalizeHist( frame_gray, frame_gray );
 
    //-- Detect faces
diff --git a/samples/cpp/video_dmtx.cpp b/samples/cpp/video_dmtx.cpp
index 4d36a3813..01eadd42c 100644
--- a/samples/cpp/video_dmtx.cpp
+++ b/samples/cpp/video_dmtx.cpp
@@ -52,7 +52,7 @@ namespace
             if (frame.empty())
                 break;
             cv::Mat gray;
-            cv::cvtColor(frame,gray,CV_RGB2GRAY);
+            cv::cvtColor(frame,gray,COLOR_RGB2GRAY);
             vector<string> codes;
             Mat corners;
             findDataMatrix(gray, codes, corners);
diff --git a/samples/cpp/video_homography.cpp b/samples/cpp/video_homography.cpp
index 01af565c4..c8388007d 100644
--- a/samples/cpp/video_homography.cpp
+++ b/samples/cpp/video_homography.cpp
@@ -161,7 +161,7 @@ int main(int ac, char ** av)
         if (frame.empty())
             break;
 
-        cvtColor(frame, gray, CV_RGB2GRAY);
+        cvtColor(frame, gray, COLOR_RGB2GRAY);
 
         detector.detect(gray, query_kpts); //Find interest points
 
diff --git a/samples/cpp/watershed.cpp b/samples/cpp/watershed.cpp
index de7f5256f..e251d5c4f 100644
--- a/samples/cpp/watershed.cpp
+++ b/samples/cpp/watershed.cpp
@@ -58,8 +58,8 @@ int main( int argc, char** argv )
     namedWindow( "image", 1 );
 
     img0.copyTo(img);
-    cvtColor(img, markerMask, CV_BGR2GRAY);
-    cvtColor(markerMask, imgGray, CV_GRAY2BGR);
+    cvtColor(img, markerMask, COLOR_BGR2GRAY);
+    cvtColor(markerMask, imgGray, COLOR_GRAY2BGR);
     markerMask = Scalar::all(0);
     imshow( "image", img );
     setMouseCallback( "image", onMouse, 0 );

From 0e1717c14c3f0928fa162c01b728cf6a830c7d11 Mon Sep 17 00:00:00 2001
From: Konstantin Matskevich <konstantin.matskevich@itseez.com>
Date: Fri, 15 Nov 2013 17:26:18 +0400
Subject: [PATCH 44/45] fixing bugs for Intel platform CPU device

---
 modules/ocl/src/arithm.cpp                    |  5 ++--
 .../src/opencl/arithm_absdiff_nonsaturate.cl  | 24 ++++++++++++-------
 modules/ocl/src/opencl/arithm_pow.cl          | 13 ++++------
 3 files changed, 24 insertions(+), 18 deletions(-)

diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp
index 997b2010f..68c52695c 100644
--- a/modules/ocl/src/arithm.cpp
+++ b/modules/ocl/src/arithm.cpp
@@ -1638,8 +1638,9 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string
     size_t localThreads[3]  = { 64, 4, 1 };
     size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
 
+    const char * const typeStr = depth == CV_32F ? "float" : "double";
     const char * const channelMap[] = { "", "", "2", "4", "4" };
-    std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[channels]);
+    std::string buildOptions = format("-D VT=%s%s -D T=%s", typeStr, channelMap[channels], typeStr);
 
     int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
     int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
@@ -1655,7 +1656,7 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string
     args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
 
     float pf = static_cast<float>(p);
-    if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
+    if(src.depth() == CV_32F)
         args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
     else
         args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
index c09560a5f..e03fa698a 100644
--- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
+++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl
@@ -65,12 +65,16 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
         int src1_index = mad24(y, src1_step, x + src1_offset);
         int src2_index = mad24(y, src2_step, x + src2_offset);
         int dst_index  = mad24(y, dst_step, x + dst_offset);
+#ifdef INTEL_DEVICE //workaround for intel compiler bug
+        if(src1_index >= 0 && src2_index >= 0)
+#endif
+        {
+            dstT t0 = convertToDstT(src1[src1_index]);
+            dstT t1 = convertToDstT(src2[src2_index]);
+            dstT t2 = t0 - t1;
 
-        dstT t0 = convertToDstT(src1[src1_index]);
-        dstT t1 = convertToDstT(src2[src2_index]);
-        dstT t2 = t0 - t1;
-
-        dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
+            dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
+        }
     }
 }
 
@@ -85,9 +89,13 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
     {
         int src1_index = mad24(y, src1_step, x + src1_offset);
         int dst_index  = mad24(y, dst_step, x + dst_offset);
+#ifdef INTEL_DEVICE //workaround for intel compiler bug
+        if(src1_index >= 0)
+#endif
+        {
+            dstT t0 = convertToDstT(src1[src1_index]);
 
-        dstT t0 = convertToDstT(src1[src1_index]);
-
-        dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
+            dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
+        }
     }
 }
diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl
index bb0673d4a..385e4cc15 100644
--- a/modules/ocl/src/opencl/arithm_pow.cl
+++ b/modules/ocl/src/opencl/arithm_pow.cl
@@ -49,16 +49,13 @@
 #elif defined (cl_khr_fp64)
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
 #endif
-#define F double
-#else
-#define F float
 #endif
 
 /************************************** pow **************************************/
 
-__kernel void arithm_pow(__global T * src, int src_step, int src_offset,
-                         __global T * dst, int dst_step, int dst_offset,
-                         int rows, int cols, F p)
+__kernel void arithm_pow(__global VT * src, int src_step, int src_offset,
+                         __global VT * dst, int dst_step, int dst_offset,
+                         int rows, int cols, T p)
 {
     int x = get_global_id(0);
     int y = get_global_id(1);
@@ -68,8 +65,8 @@ __kernel void arithm_pow(__global T * src, int src_step, int src_offset,
         int src_index = mad24(y, src_step, x + src_offset);
         int dst_index = mad24(y, dst_step, x + dst_offset);
 
-        T src_data = src[src_index];
-        T tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
+        VT src_data = src[src_index];
+        VT tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
 
         dst[dst_index] = tmp;
     }

From 327d9f274ba1c67bf958af715aec21c94b0ff84b Mon Sep 17 00:00:00 2001
From: Konstantin Matskevich <konstantin.matskevich@itseez.com>
Date: Mon, 18 Nov 2013 12:55:15 +0400
Subject: [PATCH 45/45] fixed Split for device version equals 78712

---
 modules/ocl/src/split_merge.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/ocl/src/split_merge.cpp b/modules/ocl/src/split_merge.cpp
index 60a27a5a0..400a05579 100644
--- a/modules/ocl/src/split_merge.cpp
+++ b/modules/ocl/src/split_merge.cpp
@@ -250,7 +250,8 @@ namespace cv
                         && devInfo.deviceType == CVCL_DEVICE_TYPE_CPU
                         && devInfo.platform->platformVendor.find("Intel") != std::string::npos
                         && (devInfo.deviceVersion.find("Build 56860") != std::string::npos
-                            || devInfo.deviceVersion.find("Build 76921") != std::string::npos))
+                            || devInfo.deviceVersion.find("Build 76921") != std::string::npos
+                            || devInfo.deviceVersion.find("Build 78712") != std::string::npos))
                     build_options += " -D BYPASS_VSTORE=true";
 
                 size_t globalThreads[3] = { divUp(src.cols, VEC_SIZE), src.rows, 1 };