Merge commit '43aec5ad' into merge-2.4

Conflicts: cmake/OpenCVConfig.cmake cmake/OpenCVLegacyOptions.cmake modules/contrib/src/retina.cpp modules/gpu/doc/camera_calibration_and_3d_reconstruction.rst modules/gpu/doc/video.rst modules/gpu/src/speckle_filtering.cpp modules/python/src2/cv2.cv.hpp modules/python/test/test2.py samples/python/watershed.py
2013-08-27 13:26:44 +04:00
parent 4d06c4c7b6 43aec5ad4a
commit 2c4bbb313c
448 changed files with 984 additions and 1307 deletions
--- a/modules/ocl/doc/data_structures.rst
+++ b/modules/ocl/doc/data_structures.rst
@@ -186,4 +186,4 @@ Only basic flags are supported in oclMat(i.e. depth number of channels)

 All the 3-channel matrix(i.e. RGB image) are represented by 4-channel matrix in oclMat. It means 3-channel image have 4-channel space with the last channel unused. We provide a transparent interface to handle the difference between OpenCV Mat and oclMat.

-For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
+For example: If a oclMat has 3 channels, channels() returns 3 and oclchannels() returns 4
--- a/modules/ocl/doc/feature_detection_and_description.rst
+++ b/modules/ocl/doc/feature_detection_and_description.rst
@@ -499,4 +499,4 @@ Returns block descriptors computed for the whole image.

        * **DESCR_FORMAT_COL_BY_COL** - Column-major order.

-The function is mainly used to learn the classifier.
+The function is mainly used to learn the classifier.
--- a/modules/ocl/doc/image_filtering.rst
+++ b/modules/ocl/doc/image_filtering.rst
@@ -315,4 +315,4 @@ Performs linear blending of two images.

    :param weights2: Weights for second image. Must have tha same size as ``img2`` . Supports only ``CV_32F`` type.

-    :param result: Destination image.
+    :param result: Destination image.
--- a/modules/ocl/doc/matrix_reductions.rst
+++ b/modules/ocl/doc/matrix_reductions.rst
@@ -67,4 +67,4 @@ Returns the squared sum of matrix elements for each channel

    :param m: The Source image of all depth

-Counts the squared sum of matrix elements for each channel.
+Counts the squared sum of matrix elements for each channel.
--- a/modules/ocl/doc/operations_on_matrices.rst
+++ b/modules/ocl/doc/operations_on_matrices.rst
@@ -500,13 +500,13 @@ Returns void
            * **SORT_SELECTION** selection sort, currently cannot sort duplicate keys
            * **SORT_MERGE**     merge sort
            * **SORT_RADIX**     radix sort, only support signed int/float keys(``CV_32S``/``CV_32F``)
-            
+
 Returns the sorted result of all the elements in values based on equivalent keys.

-The element unit in the values to be sorted is determined from the data type, 
+The element unit in the values to be sorted is determined from the data type,
 i.e., a ``CV_32FC2`` input ``{a1a2, b1b2}`` will be considered as two elements, regardless its matrix dimension.

-Both keys and values will be sorted inplace. 
+Both keys and values will be sorted inplace.

 Keys needs to be a **single** channel `oclMat`.

--- a/modules/ocl/doc/structures_and_utility_functions.rst
+++ b/modules/ocl/doc/structures_and_utility_functions.rst
@@ -55,4 +55,4 @@ Returns the pointer to the opencl command queue

 .. ocv:function:: void* ocl::getoclCommandQueue()

-Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
+Thefunction are used to get opencl command queue so that opencv can interactive with other opencl program.
--- a/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@ -131,7 +131,7 @@ namespace cv
            {
                openCLFree(tex_);
            }
-            operator cl_mem() 
+            operator cl_mem()
            {
                return tex_;
            }
--- a/modules/ocl/perf/perf_arithm.cpp
+++ b/modules/ocl/perf/perf_arithm.cpp
@@ -1194,4 +1194,4 @@ PERFTEST(AddWeighted)
        }

    }
-}
+}
--- a/modules/ocl/perf/perf_blend.cpp
+++ b/modules/ocl/perf/perf_blend.cpp
@@ -119,4 +119,4 @@ PERFTEST(blend)
            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.f);
        }
    }
-}
+}
--- a/modules/ocl/perf/perf_brute_force_matcher.cpp
+++ b/modules/ocl/perf/perf_brute_force_matcher.cpp
@@ -164,4 +164,4 @@ PERFTEST(BruteForceMatcher)
        else
            TestSystem::instance().setAccurate(0, diff);
    }
-}
+}
--- a/modules/ocl/perf/perf_calib3d.cpp
+++ b/modules/ocl/perf/perf_calib3d.cpp
@@ -48,14 +48,14 @@
 ///////////// StereoMatchBM ////////////////////////
 PERFTEST(StereoMatchBM)
 {
-	Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
-	Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
-	Mat disp,dst;
-	ocl::oclMat d_left, d_right,d_disp;
-	int n_disp= 128;
-	int winSize =19;
+    Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE);
+    Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE);
+    Mat disp,dst;
+    ocl::oclMat d_left, d_right,d_disp;
+    int n_disp= 128;
+    int winSize =19;

-	SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";
+    SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg ";

 	Ptr<StereoBM> bm = createStereoBM(n_disp, winSize);
 	bm->compute(left_image, right_image, dst);
@@ -64,38 +64,29 @@ PERFTEST(StereoMatchBM)
 	bm->compute(left_image, right_image, dst);
 	CPU_OFF;

-	d_left.upload(left_image);
-	d_right.upload(right_image);
+    d_left.upload(left_image);
+    d_right.upload(right_image);

-	ocl::StereoBM_OCL d_bm(0, n_disp, winSize);
+    ocl::StereoBM_OCL d_bm(0, n_disp, winSize);

-	WARMUP_ON;
-	d_bm(d_left, d_right, d_disp);
-	WARMUP_OFF;
+    WARMUP_ON;
+    d_bm(d_left, d_right, d_disp);
+    WARMUP_OFF;

    cv::Mat ocl_mat;
    d_disp.download(ocl_mat);
    ocl_mat.convertTo(ocl_mat, dst.type());

-	GPU_ON;
-	d_bm(d_left, d_right, d_disp);
-	GPU_OFF;
+    GPU_ON;
+    d_bm(d_left, d_right, d_disp);
+    GPU_OFF;
+
+    GPU_FULL_ON;
+    d_left.upload(left_image);
+    d_right.upload(right_image);
+    d_bm(d_left, d_right, d_disp);
+    d_disp.download(disp);
+    GPU_FULL_OFF;

-	GPU_FULL_ON;
-	d_left.upload(left_image);
-	d_right.upload(right_image);
-	d_bm(d_left, d_right, d_disp);
-	d_disp.download(disp);
-	GPU_FULL_OFF;
-    
    TestSystem::instance().setAccurate(-1, 0.);
 }
-
-
-
-
-
-
-
-
-	
--- a/modules/ocl/perf/perf_canny.cpp
+++ b/modules/ocl/perf/perf_canny.cpp
@@ -82,4 +82,4 @@ PERFTEST(Canny)
    GPU_FULL_OFF;

    TestSystem::instance().ExceptedMatSimilar(edges, ocl_edges, 2e-2);
-}
+}
--- a/modules/ocl/perf/perf_fft.cpp
+++ b/modules/ocl/perf/perf_fft.cpp
@@ -88,4 +88,4 @@ PERFTEST(dft)
        }

    }
-}
+}
--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -284,7 +284,7 @@ PERFTEST(GaussianBlur)
    Mat src, dst, ocl_dst;
    int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
    std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"};
-    const int ksize = 7;	
+    const int ksize = 7;

    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
    {
@@ -374,4 +374,4 @@ PERFTEST(filter2D)


    }
-}
+}
--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@@ -87,4 +87,4 @@ PERFTEST(gemm)

        TestSystem::instance().ExpectedMatNear(ocl_dst, dst, src1.cols * src1.rows * 1e-4);
    }
-}
+}
--- a/modules/ocl/perf/perf_hog.cpp
+++ b/modules/ocl/perf/perf_hog.cpp
@@ -77,7 +77,7 @@ PERFTEST(HOG)
    WARMUP_ON;
    ocl_hog.detectMultiScale(d_src, d_found_locations);
    WARMUP_OFF;
-    
+
    if(d_found_locations.size() == found_locations.size())
        TestSystem::instance().setAccurate(1, 0);
    else
@@ -91,4 +91,4 @@ PERFTEST(HOG)
    d_src.upload(src);
    ocl_hog.detectMultiScale(d_src, found_locations);
    GPU_FULL_OFF;
-}
+}
--- a/modules/ocl/perf/perf_matrix_operation.cpp
+++ b/modules/ocl/perf/perf_matrix_operation.cpp
@@ -183,4 +183,4 @@ PERFTEST(setTo)
        }

    }
-}
+}
--- a/modules/ocl/perf/perf_norm.cpp
+++ b/modules/ocl/perf/perf_norm.cpp
@@ -72,7 +72,7 @@ PERFTEST(norm)
        WARMUP_OFF;

        d_src1.download(ocl_src1);
-        TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);                        
+        TestSystem::instance().ExpectedMatNear(src1, ocl_src1, .5);

        GPU_ON;
        ocl::norm(d_src1, d_src2, NORM_INF);
@@ -84,4 +84,4 @@ PERFTEST(norm)
        ocl::norm(d_src1, d_src2, NORM_INF);
        GPU_FULL_OFF;
    }
-}
+}
--- a/modules/ocl/perf/perf_pyramid.cpp
+++ b/modules/ocl/perf/perf_pyramid.cpp
@@ -129,4 +129,4 @@ PERFTEST(pyrUp)
            TestSystem::instance().ExpectedMatNear(dst, ocl_dst, (src.depth() == CV_32F ? 1e-4f : 1.0));
        }
    }
-}
+}
--- a/modules/ocl/perf/perf_split_merge.cpp
+++ b/modules/ocl/perf/perf_split_merge.cpp
@@ -133,7 +133,7 @@ PERFTEST(Split)

            WARMUP_ON;
            ocl::split(d_src, d_dst);
-            WARMUP_OFF;         
+            WARMUP_OFF;

            GPU_ON;
            ocl::split(d_src, d_dst);
--- a/modules/ocl/src/blend.cpp
+++ b/modules/ocl/src/blend.cpp
@@ -89,4 +89,4 @@ void cv::ocl::blendLinear(const oclMat &img1, const oclMat &img2, const oclMat &

        openCLExecuteKernel(ctx, &blend_linear, kernelName, globalSize, localSize, args, channels, depth);
    }
-}
+}
--- a/modules/ocl/src/columnsum.cpp
+++ b/modules/ocl/src/columnsum.cpp
@@ -81,4 +81,4 @@ void cv::ocl::columnSum(const oclMat &src, oclMat &dst)

    openCLExecuteKernel(clCxt, &imgproc_columnsum, kernelName, globalThreads, localThreads, args, src.channels(), src.depth());

-}
+}
--- a/modules/ocl/src/interpolate_frames.cpp
+++ b/modules/ocl/src/interpolate_frames.cpp
@@ -235,4 +235,3 @@ void interpolate::bindImgTex(const oclMat &img, cl_mem &texture)
    }
    texture = bindTexture(img);
 }
-
--- a/modules/ocl/src/kmeans.cpp
+++ b/modules/ocl/src/kmeans.cpp
@@ -435,4 +435,3 @@ double cv::ocl::kmeans(const oclMat &_src, int K, oclMat &_bestLabels,

    return best_compactness;
 }
-
--- a/modules/ocl/src/opencl/arithm_add_scalar.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar.cl
@@ -67,7 +67,7 @@ __kernel void arithm_s_add_C1_D0 (__global   uchar *src1, int src1_step, int src
    if (x < cols && y < rows)
    {
        x = x << 2;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -110,7 +110,7 @@ __kernel void arithm_s_add_C1_D2 (__global   ushort *src1, int src1_step, int sr
    if (x < cols && y < rows)
    {
        x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -145,7 +145,7 @@ __kernel void arithm_s_add_C1_D3 (__global   short *src1, int src1_step, int src
    if (x < cols && y < rows)
    {
        x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -250,7 +250,7 @@ __kernel void arithm_s_add_C2_D0 (__global   uchar *src1, int src1_step, int src
    if (x < cols && y < rows)
    {
        x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
--- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
+++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl
@@ -69,7 +69,7 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global   uchar *src1, int src1_ste
    if (x < cols && y < rows)
    {
        x = x << 2;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -122,7 +122,7 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global   ushort *src1, int src1_st
    if (x < cols && y < rows)
    {
        x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -160,7 +160,7 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global   short *src1, int src1_ste
    if (x < cols && y < rows)
    {
        x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
@@ -284,7 +284,7 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global   uchar *src1, int src1_ste
    if (x < cols && y < rows)
    {
        x = x << 1;
-        
+
 #ifdef dst_align
 #undef dst_align
 #endif
--- a/modules/ocl/src/opencl/arithm_minMaxLoc.cl
+++ b/modules/ocl/src/opencl/arithm_minMaxLoc.cl
@@ -377,4 +377,4 @@ __kernel void arithm_op_minMaxLoc_mask (int cols,int invalid_cols,int offset,int
   }
 }

-*/
+*/
--- a/modules/ocl/src/opencl/arithm_mul.cl
+++ b/modules/ocl/src/opencl/arithm_mul.cl
@@ -300,4 +300,4 @@ __kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offs

        *((__global float *)((__global char *)dst + dst_index)) = tmp;
    }
-}
+}
--- a/modules/ocl/src/opencl/filtering_boxFilter.cl
+++ b/modules/ocl/src/opencl/filtering_boxFilter.cl
@@ -231,7 +231,7 @@ __kernel void boxFilter_C1_D0(__global const uchar * restrict src, __global ucha
    {
        tmp_sum += (data[i]);
    }
-    
+
    int index = dst_startY * dst_step + dst_startX + (col-anX)*4;

    temp[0][col] = tmp_sum + (data[0]);
--- a/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
+++ b/modules/ocl/src/opencl/haarobjectdetect_scaled2.cl
@@ -207,7 +207,7 @@ __kernel void gpuRunHaarClassifierCascade_scaled2(
                        - sum[clamp(mad24(info3.y, step, info3.z), 0, max_idx)] -
                                     sum[clamp(mad24(info3.w, step, info3.x), 0, max_idx)]
                        + sum[clamp(mad24(info3.w, step, info3.z), 0, max_idx)]) * w.z;
-                        
+
                        bool passThres = classsum >= nodethreshold;

 #if STUMP_BASED
@@ -304,4 +304,3 @@ __kernel void gpuscaleclassifier(global GpuHidHaarTreeNode *orinode, global GpuH
    newnode[counter].alpha[1] = t1.alpha[1];
    newnode[counter].alpha[2] = t1.alpha[2];
 }
-
--- a/modules/ocl/src/opencl/imgproc_clahe.cl
+++ b/modules/ocl/src/opencl/imgproc_clahe.cl
@@ -71,15 +71,15 @@ void reduce(volatile __local int* smem, int val, int tid)
    barrier(CLK_LOCAL_MEM_FENCE);

    if (tid < 128)
-    { 
+    {
        smem[tid] = val += smem[tid + 128];
-    } 
+    }
    barrier(CLK_LOCAL_MEM_FENCE);

    if (tid < 64)
-    { 
+    {
        smem[tid] = val += smem[tid + 64];
-    } 
+    }
    barrier(CLK_LOCAL_MEM_FENCE);

    if (tid < 32)
@@ -125,15 +125,15 @@ void reduce(__local volatile int* smem, int val, int tid)
    barrier(CLK_LOCAL_MEM_FENCE);

    if (tid < 128)
-    { 
+    {
        smem[tid] = val += smem[tid + 128];
-    } 
+    }
    barrier(CLK_LOCAL_MEM_FENCE);

    if (tid < 64)
-    { 
+    {
        smem[tid] = val += smem[tid + 64];
-    } 
+    }
    barrier(CLK_LOCAL_MEM_FENCE);

    if (tid < 32)
--- a/modules/ocl/src/opencl/imgproc_gftt.cl
+++ b/modules/ocl/src/opencl/imgproc_gftt.cl
@@ -49,12 +49,12 @@

 __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

-inline float ELEM_INT2(image2d_t _eig, int _x, int _y) 
+inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
 {
    return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
 }

-inline float ELEM_FLT2(image2d_t _eig, float2 pt) 
+inline float ELEM_FLT2(image2d_t _eig, float2 pt)
 {
    return read_imagef(_eig, sampler, pt).x;
 }
@@ -132,7 +132,7 @@ __kernel
    const int pairDistance = 1 << (stage - passOfStage);
    const int blockWidth   = 2 * pairDistance;

-    const int leftId = min( (threadId % pairDistance) 
+    const int leftId = min( (threadId % pairDistance)
                   + (threadId / pairDistance) * blockWidth, count );

    const int rightId = min( leftId + pairDistance, count );
@@ -147,7 +147,7 @@ __kernel

    float2 greater = compareResult ? leftPt:rightPt;
    float2 lesser  = compareResult ? rightPt:leftPt;
-    
+
    corners[leftId]  = sortOrder ? lesser : greater;
    corners[rightId] = sortOrder ? greater : lesser;
 }
@@ -195,20 +195,20 @@ __kernel
    {
        pt2  = scratch[j];
        val2 = ELEM_FLT2(eig, pt2);
-        if(val2 > val1) 
+        if(val2 > val1)
            pos++;//calculate the rank of this element in this work group
-        else 
+        else
        {
            if(val1 > val2)
                continue;
-            else 
+            else
            {
                // val1 and val2 are same
                same++;
            }
        }
    }
-    for (int j=0; j< same; j++)      
+    for (int j=0; j< same; j++)
        corners[pos + j] = pt1;
 }
 __kernel
@@ -240,15 +240,15 @@ __kernel
        for(int k=0; k<wg; k++)
        {
            pt2  = corners[j*wg + k];
-            val2 = ELEM_FLT2(eig, pt2); 
+            val2 = ELEM_FLT2(eig, pt2);
            if(val1 > val2)
                break;
            else
            {
-                //Increment only if the value is not the same. 
+                //Increment only if the value is not the same.
                if( val2 > val1 )
                    pos++;
-                else 
+                else
                    same++;
            }
        }
@@ -257,20 +257,19 @@ __kernel
    for(int k=0; k<remainder; k++)
    {
        pt2  = corners[(numOfGroups-1)*wg + k];
-        val2 = ELEM_FLT2(eig, pt2); 
+        val2 = ELEM_FLT2(eig, pt2);
        if(val1 > val2)
            break;
        else
        {
-            //Don't increment if the value is the same. 
+            //Don't increment if the value is the same.
            //Two elements are same if (*userComp)(jData, iData)  and (*userComp)(iData, jData) are both false
            if(val2 > val1)
                pos++;
-            else 
+            else
                same++;
        }
-    }  
-    for (int j=0; j< same; j++)      
+    }
+    for (int j=0; j< same; j++)
        corners[pos + j] = pt1;
 }
-
--- a/modules/ocl/src/opencl/imgproc_integral.cl
+++ b/modules/ocl/src/opencl/imgproc_integral.cl
@@ -490,4 +490,4 @@ kernel void integral_rows_D5(__global float4 *srcsum,__global float4 * srcsqsum,
        }
        barrier(CLK_LOCAL_MEM_FENCE);
    }
-}
+}
--- a/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_radix_sort_by_key.cl
@@ -43,7 +43,7 @@
 //
 //M*/

-#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable 
+#pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable

 #ifndef N   // number of radices
 #define N 4
@@ -71,14 +71,14 @@ __inline uint convertKey(uint converted_key)
    converted_key ^= mask;
 #elif defined(K_INT)
    const uint SIGN_MASK = 1u << ((sizeof(int) * 8) - 1);
-    converted_key ^= SIGN_MASK;	
+    converted_key ^= SIGN_MASK;
 #else

 #endif
    return converted_key;
 }

-//FIXME(pengx17): 
+//FIXME(pengx17):
 // exclusive scan, need to be optimized as this is too naive...
 kernel
    void naiveScanAddition(
@@ -108,7 +108,7 @@ kernel
 {
    const int RADIX_T     = N;
    const int RADICES_T   = (1 << RADIX_T);
-    const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T; 
+    const int NUM_OF_ELEMENTS_PER_WORK_ITEM_T = RADICES_T;
    const int MASK_T      = (1 << RADIX_T) - 1;
    int localBuckets[16] = {0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,0};
--- a/modules/ocl/src/opencl/kernel_sort_by_key.cl
+++ b/modules/ocl/src/opencl/kernel_sort_by_key.cl
@@ -62,7 +62,7 @@
 #endif

 /////////////////////// Bitonic sort ////////////////////////////
-// ported from 
+// ported from
 // https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_by_key_kernels.cl
 __kernel
    void bitonicSort
@@ -82,7 +82,7 @@ __kernel
    const int pairDistance = 1 << (stage - passOfStage);
    const int blockWidth   = 2 * pairDistance;

-    int leftId = min( (threadId % pairDistance) 
+    int leftId = min( (threadId % pairDistance)
                   + (threadId / pairDistance) * blockWidth, count );

    int rightId = min( leftId + pairDistance, count );
@@ -90,7 +90,7 @@ __kernel
    int temp;

    const V_T lval = vals[leftId];
-    const V_T rval = vals[rightId]; 
+    const V_T rval = vals[rightId];

    const K_T lkey = keys[leftId];
    const K_T rkey = keys[rightId];
@@ -142,7 +142,7 @@ __kernel

    int offset   = groupID * wg;
    int same     = 0;
-    
+
    vals      += offset;
    keys      += offset;
    n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
@@ -163,13 +163,13 @@ __kernel
    for (int j=0;j<n;++j)
    {
        key2  = scratch[j];
-        if(my_comp(key2, key1)) 
+        if(my_comp(key2, key1))
            pos++;//calculate the rank of this element in this work group
-        else 
+        else
        {
            if(my_comp(key1, key2))
                continue;
-            else 
+            else
            {
                // key1 and key2 are same
                same++;
@@ -209,15 +209,15 @@ __kernel
    {
        for(int k=0; k<wg; k++)
        {
-            key2 = keys[j*wg + k]; 
+            key2 = keys[j*wg + k];
            if(my_comp(key1, key2))
                break;
            else
            {
-                //Increment only if the value is not the same. 
+                //Increment only if the value is not the same.
                if(my_comp(key2, key1))
                    pos++;
-                else 
+                else
                    same++;
            }
        }
@@ -225,18 +225,18 @@ __kernel

    for(int k=0; k<remainder; k++)
    {
-        key2 = keys[(numOfGroups-1)*wg + k]; 
+        key2 = keys[(numOfGroups-1)*wg + k];
        if(my_comp(key1, key2))
            break;
        else
        {
-            //Don't increment if the value is the same. 
+            //Don't increment if the value is the same.
            if(my_comp(key2, key1))
                pos++;
-            else 
+            else
                same++;
        }
-    }  
+    }
    for (int j=0; j< same; j++)
    {
        vals[pos + j] = val1;
--- a/modules/ocl/src/opencl/objdetect_hog.cl
+++ b/modules/ocl/src/opencl/objdetect_hog.cl
@@ -318,15 +318,15 @@ __kernel void classify_hists_180_kernel(
    volatile __local float* smem = products;
 #ifdef CPU
    if (tid < 13) smem[tid] = product = product + smem[tid + 32];
-	barrier(CLK_LOCAL_MEM_FENCE);
+    barrier(CLK_LOCAL_MEM_FENCE);
    if (tid < 16) smem[tid] = product = product + smem[tid + 16];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<8) smem[tid] = product = product + smem[tid + 8];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<4) smem[tid] = product = product + smem[tid + 4];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<2) smem[tid] = product = product + smem[tid + 2];
-	barrier(CLK_LOCAL_MEM_FENCE);
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
 #else
    if (tid < 13)
    {
@@ -345,9 +345,9 @@ __kernel void classify_hists_180_kernel(
 #endif

    if (tid == 0){
-		product = product + smem[tid + 1];
+        product = product + smem[tid + 1];
        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
-	}
+    }
 }

 //---------------------------------------------------------------------
@@ -388,18 +388,18 @@ __kernel void classify_hists_252_kernel(
    if (tid < 64) products[tid] = product = product + products[tid + 64];
    barrier(CLK_LOCAL_MEM_FENCE);

-	volatile __local float* smem = products;
+    volatile __local float* smem = products;
 #ifdef CPU
-	if(tid<32) smem[tid] = product = product + smem[tid + 32];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<16) smem[tid] = product = product + smem[tid + 16];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<8) smem[tid] = product = product + smem[tid + 8];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<4) smem[tid] = product = product + smem[tid + 4];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<2) smem[tid] = product = product + smem[tid + 2];
-	barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<32) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
 #else
    if (tid < 32)
    {
@@ -415,9 +415,9 @@ __kernel void classify_hists_252_kernel(
    }
 #endif
    if (tid == 0){
-		product = product + smem[tid + 1];
+        product = product + smem[tid + 1];
        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
-	}
+    }
 }

 //---------------------------------------------------------------------
@@ -458,18 +458,18 @@ __kernel void classify_hists_kernel(
    if (tid < 64) products[tid] = product = product + products[tid + 64];
    barrier(CLK_LOCAL_MEM_FENCE);

-	volatile __local float* smem = products;
+    volatile __local float* smem = products;
 #ifdef CPU
-	if(tid<32) smem[tid] = product = product + smem[tid + 32];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<16) smem[tid] = product = product + smem[tid + 16];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<8) smem[tid] = product = product + smem[tid + 8];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<4) smem[tid] = product = product + smem[tid + 4];
-	barrier(CLK_LOCAL_MEM_FENCE);
-	if(tid<2) smem[tid] = product = product + smem[tid + 2];
-	barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<32) smem[tid] = product = product + smem[tid + 32];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<16) smem[tid] = product = product + smem[tid + 16];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<8) smem[tid] = product = product + smem[tid + 8];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<4) smem[tid] = product = product + smem[tid + 4];
+    barrier(CLK_LOCAL_MEM_FENCE);
+    if(tid<2) smem[tid] = product = product + smem[tid + 2];
+    barrier(CLK_LOCAL_MEM_FENCE);
 #else
    if (tid < 32)
    {
@@ -485,9 +485,9 @@ __kernel void classify_hists_kernel(
    }
 #endif
    if (tid == 0){
-		smem[tid] = product = product + smem[tid + 1];
+        smem[tid] = product = product + smem[tid + 1];
        labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold);
-	}
+    }
 }

 //----------------------------------------------------------------------------
--- a/modules/ocl/src/opencl/stereobm.cl
+++ b/modules/ocl/src/opencl/stereobm.cl
@@ -190,7 +190,7 @@ __kernel void stereoKernel(__global unsigned char *left, __global unsigned char
        {
            int idx1 = y_tex * img_step + x_tex;
            int idx2 = min(y_tex + ((radius << 1) + 1), cheight - 1) * img_step + x_tex;
-            
+
            barrier(CLK_LOCAL_MEM_FENCE);

            StepDown(idx1, idx2, left, right, d, col_ssd);
--- a/modules/ocl/src/opencl/stereocsbp.cl
+++ b/modules/ocl/src/opencl/stereocsbp.cl
@@ -129,7 +129,7 @@ __kernel void get_first_k_initial_global_1(__global  float *data_cost_selected_,
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////get_first_k_initial_local////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_, __global short *selected_disp_pyr, 
+__kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_, __global short *selected_disp_pyr,
    __global  short *ctemp,int h, int w, int nr_plane,
    int cmsg_step1, int cdisp_step1, int cndisp)
 {
@@ -187,7 +187,7 @@ __kernel void get_first_k_initial_local_0(__global  short *data_cost_selected_,
    }
 }

-__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr, 
+__kernel void get_first_k_initial_local_1(__global float *data_cost_selected_, __global float *selected_disp_pyr,
    __global float *ctemp,int h, int w, int nr_plane,
    int cmsg_step1,  int cdisp_step1, int cndisp)
 {
@@ -257,20 +257,20 @@ float compute_3(__global uchar* left, __global uchar* right,

    return fmin(cdata_weight * (tr + tg + tb), cdata_weight * cmax_data_term);
 }
-float compute_1(__global uchar* left, __global uchar* right, 
+float compute_1(__global uchar* left, __global uchar* right,
    float cdata_weight,  float cmax_data_term)
 {
    return fmin(cdata_weight * abs((int)*left - (int)*right), cdata_weight * cmax_data_term);
 }
 short round_short(float v){
-    return convert_short_sat_rte(v); 
+    return convert_short_sat_rte(v);
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////init_data_cost///////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright, 
+__kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
    int h, int w, int level, int channels,
-    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1, 
+    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
    int cth, int cimg_step, int cndisp)
 {
    int x = get_global_id(0);
@@ -312,9 +312,9 @@ __kernel void init_data_cost_0(__global short *ctemp, __global uchar *cleft, __g
        }
    }
 }
-__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright, 
+__kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __global uchar *cright,
    int h, int w, int level, int channels,
-    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1, 
+    int cmsg_step1, float cdata_weight, float cmax_data_term, int cdisp_step1,
    int cth, int cimg_step, int cndisp)
 {
    int x = get_global_id(0);
@@ -361,13 +361,13 @@ __kernel void init_data_cost_1(__global float *ctemp, __global uchar *cleft, __g
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
 __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cleft, __global uchar *cright,
    __local float *smem, int level, int rows, int cols, int h, int winsz, int channels,
-    int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth, 
+    int cndisp,int cimg_step, float cdata_weight, float cmax_data_term, int cth,
    int cdisp_step1, int cmsg_step1)
 {
    int x_out = get_group_id(0);
    int y_out = get_group_id(1) % h;
    //int d = (blockIdx.y / h) * blockDim.z + threadIdx.z;
-    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2); 
+    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);

    int tid = get_local_id(0);

@@ -411,39 +411,39 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
    if(d < cndisp)
    {
        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 256) 
+        if (winsz >= 256)
        {
-            if (tid < 128) 
-                dline[tid] += dline[tid + 128]; 
+            if (tid < 128)
+                dline[tid] += dline[tid + 128];
        }
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 128) 
+        if (winsz >= 128)
        {
-            if (tid <  64) 
-                dline[tid] += dline[tid + 64]; 
+            if (tid <  64)
+                dline[tid] += dline[tid + 64];
        }
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
-            if (tid < 32) 
+        if (winsz >= 64)
+            if (tid < 32)
                vdline[tid] += vdline[tid + 32];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
-            if (tid < 16) 
+        if (winsz >= 32)
+            if (tid < 16)
                vdline[tid] += vdline[tid + 16];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
@@ -452,7 +452,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
        if (winsz >= 16)
-            if (tid <  8) 
+            if (tid <  8)
                vdline[tid] += vdline[tid + 8];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
@@ -461,7 +461,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
        if (winsz >= 8)
-            if (tid <  4) 
+            if (tid <  4)
                vdline[tid] += vdline[tid + 4];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
@@ -470,7 +470,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
        if (winsz >= 4)
-            if (tid <  2) 
+            if (tid <  2)
                vdline[tid] += vdline[tid + 2];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
@@ -479,7 +479,7 @@ __kernel void init_data_cost_reduce_0(__global short *ctemp, __global uchar *cle
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
        if (winsz >= 2)
-            if (tid <  1) 
+            if (tid <  1)
                vdline[tid] += vdline[tid + 1];
    }
    barrier(CLK_LOCAL_MEM_FENCE);
@@ -500,7 +500,7 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
 {
    int x_out = get_group_id(0);
    int y_out = get_group_id(1) % h;
-    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2); 
+    int d = (get_group_id(1) / h ) * get_local_size(2) + get_local_id(2);

    int tid = get_local_id(0);

@@ -545,74 +545,74 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
    if(d < cndisp)
    {
        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 256) 
-            if (tid < 128) 
-                dline[tid] += dline[tid + 128]; 
+        if (winsz >= 256)
+            if (tid < 128)
+                dline[tid] += dline[tid + 128];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local float* dline = smem + winsz * get_local_id(2);
-        if (winsz >= 128) 
-            if (tid < 64) 
-                dline[tid] += dline[tid + 64]; 
+        if (winsz >= 128)
+            if (tid < 64)
+                dline[tid] += dline[tid + 64];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
-            if (tid < 32) 
-                vdline[tid] += vdline[tid + 32]; 
+        if (winsz >= 64)
+            if (tid < 32)
+                vdline[tid] += vdline[tid + 32];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
-            if (tid < 16) 
-                vdline[tid] += vdline[tid + 16]; 
+        if (winsz >= 32)
+            if (tid < 16)
+                vdline[tid] += vdline[tid + 16];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16) 
-            if (tid < 8) 
-                vdline[tid] += vdline[tid + 8]; 
+        if (winsz >= 16)
+            if (tid < 8)
+                vdline[tid] += vdline[tid + 8];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 8) 
-            if (tid < 4) 
-                vdline[tid] += vdline[tid + 4]; 
+        if (winsz >= 8)
+            if (tid < 4)
+                vdline[tid] += vdline[tid + 4];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 4) 
-            if (tid < 2) 
-                vdline[tid] += vdline[tid + 2]; 
+        if (winsz >= 4)
+            if (tid < 2)
+                vdline[tid] += vdline[tid + 2];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 2) 
-            if (tid < 1) 
-                vdline[tid] += vdline[tid + 1]; 
+        if (winsz >= 2)
+            if (tid < 1)
+                vdline[tid] += vdline[tid + 1];
    }
-    barrier(CLK_LOCAL_MEM_FENCE); 
+    barrier(CLK_LOCAL_MEM_FENCE);

    if(d < cndisp)
    {
@@ -626,10 +626,10 @@ __kernel void init_data_cost_reduce_1(__global float *ctemp, __global uchar *cle
 ///////////////////////////////////////////////////////////////
 ////////////////////// compute data cost //////////////////////
 ///////////////////////////////////////////////////////////////
-__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_, 
+__kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __global short *data_cost_,
    __global uchar *cleft, __global uchar *cright,
    int h, int w, int level, int nr_plane, int channels,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
    float cmax_data_term, int cimg_step, int cth)
 {

@@ -676,10 +676,10 @@ __kernel void compute_data_cost_0(__global const short *selected_disp_pyr, __glo
        }
    }
 }
-__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_, 
+__kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __global float *data_cost_,
    __global uchar *cleft, __global uchar *cright,
    int h, int w, int level, int nr_plane, int channels,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, float cdata_weight,
    float cmax_data_term, int cimg_step, int cth)
 {

@@ -728,11 +728,11 @@ __kernel void compute_data_cost_1(__global const float *selected_disp_pyr, __glo
 ////////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////compute_data_cost_reduce//////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////////////////////////////////
-__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_, 
+__kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr, __global short* data_cost_,
    __global uchar *cleft, __global uchar *cright,__local float *smem,
-    int level, int rows, int cols, int h, int nr_plane, 
+    int level, int rows, int cols, int h, int nr_plane,
    int channels, int winsz,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1, int cdisp_step2,
    float cdata_weight,  float cmax_data_term, int cimg_step,int cth)

 {
@@ -788,9 +788,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
+        if (winsz >= 64)
        {
-            if (tid < 32) 
+            if (tid < 32)
                vdline[tid] += vdline[tid + 32];
        }
    }
@@ -799,9 +799,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
+        if (winsz >= 32)
        {
-            if (tid < 16) 
+            if (tid < 16)
                vdline[tid] += vdline[tid + 16];
        }
    }
@@ -810,9 +810,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16) 
+        if (winsz >= 16)
        {
-            if (tid < 8) 
+            if (tid < 8)
                vdline[tid] += vdline[tid + 8];
        }
    }
@@ -821,9 +821,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 8) 
+        if (winsz >= 8)
        {
-            if (tid < 4) 
+            if (tid < 4)
                vdline[tid] += vdline[tid + 4];
        }
    }
@@ -832,9 +832,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 4) 
+        if (winsz >= 4)
        {
-            if (tid < 2) 
+            if (tid < 2)
                vdline[tid] += vdline[tid + 2];
        }
    }
@@ -843,9 +843,9 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 2) 
+        if (winsz >= 2)
        {
-            if (tid < 1) 
+            if (tid < 1)
                vdline[tid] += vdline[tid + 1];
        }
    }
@@ -859,11 +859,11 @@ __kernel void compute_data_cost_reduce_0(__global const short* selected_disp_pyr
    }
 }

-__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_, 
+__kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr, __global float *data_cost_,
    __global uchar *cleft, __global uchar *cright, __local float *smem,
-    int level, int rows, int cols, int h, int nr_plane, 
+    int level, int rows, int cols, int h, int nr_plane,
    int channels, int winsz,
-    int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight, 
+    int cmsg_step1, int cmsg_step2, int cdisp_step1,int cdisp_step2, float cdata_weight,
    float cmax_data_term, int cimg_step, int cth)

 {
@@ -918,21 +918,21 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 64) 
+        if (winsz >= 64)
        {
-            if (tid < 32) 
+            if (tid < 32)
                vdline[tid] += vdline[tid + 32];
        }
    }
    barrier(CLK_LOCAL_MEM_FENCE);


-    if(d < nr_plane)	
+    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 32) 
+        if (winsz >= 32)
        {
-            if (tid < 16) 
+            if (tid < 16)
                vdline[tid] += vdline[tid + 16];
        }
    }
@@ -941,9 +941,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >= 16) 
+        if (winsz >= 16)
        {
-            if (tid <  8) 
+            if (tid <  8)
                vdline[tid] += vdline[tid + 8];
        }
    }
@@ -952,9 +952,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  8) 
+        if (winsz >=  8)
        {
-            if (tid <  4) 
+            if (tid <  4)
                vdline[tid] += vdline[tid + 4];
        }
    }
@@ -963,9 +963,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  4) 
+        if (winsz >=  4)
        {
-            if (tid <  2) 
+            if (tid <  2)
                vdline[tid] += vdline[tid + 2];
        }
    }
@@ -974,9 +974,9 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
    if(d < nr_plane)
    {
        __local volatile float* vdline = smem + winsz * get_local_id(2);
-        if (winsz >=  2) 
+        if (winsz >=  2)
        {
-            if (tid <  1) 
+            if (tid <  1)
                vdline[tid] += vdline[tid + 1];
        }
    }
@@ -993,11 +993,11 @@ __kernel void compute_data_cost_reduce_1(__global const float *selected_disp_pyr
 ///////////////////////////////////////////////////////////////
 //////////////////////// init message /////////////////////////
 ///////////////////////////////////////////////////////////////
-void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new, 
-    __global short *r_new, __global const short *u_cur, __global const short *d_cur, 
-    __global const short *l_cur, __global const short *r_cur, 
-    __global short *data_cost_selected, __global short *disparity_selected_new, 
-    __global short *data_cost_new, __global const short* data_cost_cur, 
+void get_first_k_element_increase_0(__global short* u_new, __global short *d_new, __global short *l_new,
+    __global short *r_new, __global const short *u_cur, __global const short *d_cur,
+    __global const short *l_cur, __global const short *r_cur,
+    __global short *data_cost_selected, __global short *disparity_selected_new,
+    __global short *data_cost_new, __global const short* data_cost_cur,
    __global const short *disparity_selected_cur,
    int nr_plane, int nr_plane2,
    int cdisp_step1, int cdisp_step2)
@@ -1027,11 +1027,11 @@ void get_first_k_element_increase_0(__global short* u_new, __global short *d_new
        data_cost_new[id * cdisp_step1] = SHRT_MAX;
    }
 }
-void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new, 
-    __global float *r_new, __global const float *u_cur, __global const float *d_cur, 
+void get_first_k_element_increase_1(__global float *u_new, __global float *d_new, __global float *l_new,
+    __global float *r_new, __global const float *u_cur, __global const float *d_cur,
    __global const float *l_cur, __global const float *r_cur,
-    __global float *data_cost_selected, __global float *disparity_selected_new, 
-    __global float *data_cost_new, __global const float *data_cost_cur, 
+    __global float *data_cost_selected, __global float *disparity_selected_new,
+    __global float *data_cost_new, __global const float *data_cost_cur,
    __global const float *disparity_selected_cur,
    int nr_plane, int nr_plane2,
    int cdisp_step1, int cdisp_step2)
@@ -1057,13 +1057,13 @@ void get_first_k_element_increase_1(__global float *u_new, __global float *d_new
        u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
        d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
        l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-        r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];   
+        r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
        data_cost_new[id * cdisp_step1] = FLT_MAX;

    }
 }
 __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __global short *l_new_,
-    __global short *r_new_, __global  short *u_cur_, __global const short *d_cur_, 
+    __global short *r_new_, __global  short *u_cur_, __global const short *d_cur_,
    __global const short *l_cur_, __global const short *r_cur_, __global short *ctemp,
    __global short *selected_disp_pyr_new, __global const short *selected_disp_pyr_cur,
    __global short *data_cost_selected_, __global const short *data_cost_,
@@ -1113,7 +1113,7 @@ __kernel void init_message_0(__global short *u_new_, __global short *d_new_, __g
    }
 }
 __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __global float *l_new_,
-    __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_, 
+    __global float *r_new_, __global const float *u_cur_, __global const float *d_cur_,
    __global const float *l_cur_, __global const float *r_cur_, __global float *ctemp,
    __global float *selected_disp_pyr_new, __global const float *selected_disp_pyr_cur,
    __global float *data_cost_selected_, __global const float *data_cost_,
@@ -1176,28 +1176,28 @@ __kernel void init_message_1(__global float *u_new_, __global float *d_new_, __g
                    id = j;
                }
            }
-            data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];	
+            data_cost_selected[i * cdisp_step1] = data_cost[id * cdisp_step1];
            disparity_selected_new[i * cdisp_step1] = disparity_selected_cur[id * cdisp_step2];
            u_new[i * cdisp_step1] = u_cur[id * cdisp_step2];
            d_new[i * cdisp_step1] = d_cur[id * cdisp_step2];
            l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
-            r_new[i * cdisp_step1] = r_cur[id * cdisp_step2]; 
+            r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
            data_cost_new[id * cdisp_step1] = FLT_MAX;
-        }  
+        }
    }
 }

 ///////////////////////////////////////////////////////////////
 ////////////////////  calc all iterations /////////////////////
 ///////////////////////////////////////////////////////////////
-void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1, 
+void message_per_pixel_0(__global const short *data, __global short *msg_dst, __global const short *msg1,
    __global const short *msg2, __global const short *msg3,
-    __global const short *dst_disp, __global const short *src_disp, 
+    __global const short *dst_disp, __global const short *src_disp,
    int nr_plane, __global short *temp,
    float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
 {
    short minimum = SHRT_MAX;
-    for(int d = 0; d < nr_plane; d++)     
+    for(int d = 0; d < nr_plane; d++)
    {
        int idx = d * cdisp_step1;
        short val  = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
@@ -1215,7 +1215,7 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
        short src_disp_reg = src_disp[d * cdisp_step1];

        for(int d2 = 0; d2 < nr_plane; d2++)
-            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] + 
+            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
            cdisc_single_jump * abs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));

        temp[d * cdisp_step1] = convert_short_sat_rte(cost_min);
@@ -1226,14 +1226,14 @@ void message_per_pixel_0(__global const short *data, __global short *msg_dst, __
    for(int d = 0; d < nr_plane; d++)
        msg_dst[d * cdisp_step1] = convert_short_sat_rte(temp[d * cdisp_step1] - sum);
 }
-void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1, 
+void message_per_pixel_1(__global const float *data, __global float *msg_dst, __global const float *msg1,
    __global const float *msg2, __global const float *msg3,
-    __global const float *dst_disp, __global const float *src_disp, 
+    __global const float *dst_disp, __global const float *src_disp,
    int nr_plane, __global float *temp,
    float cmax_disc_term, int cdisp_step1, float cdisc_single_jump)
 {
    float minimum = FLT_MAX;
-    for(int d = 0; d < nr_plane; d++)     
+    for(int d = 0; d < nr_plane; d++)
    {
        int idx = d * cdisp_step1;
        float val  = data[idx] + msg1[idx] + msg2[idx] + msg3[idx];
@@ -1251,7 +1251,7 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
        float src_disp_reg = src_disp[d * cdisp_step1];

        for(int d2 = 0; d2 < nr_plane; d2++)
-            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] + 
+            cost_min = fmin(cost_min, (msg_dst[d2 * cdisp_step1] +
            cdisc_single_jump * fabs(dst_disp[d2 * cdisp_step1] - src_disp_reg)));

        temp[d * cdisp_step1] = cost_min;
@@ -1262,9 +1262,9 @@ void message_per_pixel_1(__global const float *data, __global float *msg_dst, __
    for(int d = 0; d < nr_plane; d++)
        msg_dst[d * cdisp_step1] = temp[d * cdisp_step1] - sum;
 }
-__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_, 
-    __global const short *data_cost_selected, __global const short *selected_disp_pyr_cur, 
-    __global short *ctemp, int h, int w, int nr_plane, int i, 
+__kernel void compute_message_0(__global short *u_, __global short *d_, __global short *l_, __global short *r_,
+    __global const short *data_cost_selected, __global const short *selected_disp_pyr_cur,
+    __global short *ctemp, int h, int w, int nr_plane, int i,
    float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
 {
    int y = get_global_id(1);
@@ -1283,7 +1283,7 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global

        __global short *temp = ctemp + y * cmsg_step1 + x;

-        message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp, 
+        message_per_pixel_0(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
            cmax_disc_term, cdisp_step1, cdisc_single_jump);
        message_per_pixel_0(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
            cmax_disc_term, cdisp_step1, cdisc_single_jump);
@@ -1293,9 +1293,9 @@ __kernel void compute_message_0(__global short *u_, __global short *d_, __global
            cmax_disc_term, cdisp_step1, cdisc_single_jump);
    }
 }
-__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_, 
-    __global const float *data_cost_selected, __global const float *selected_disp_pyr_cur, 
-    __global float *ctemp, int h, int w, int nr_plane, int i, 
+__kernel void compute_message_1(__global float *u_, __global float *d_, __global float *l_, __global float *r_,
+    __global const float *data_cost_selected, __global const float *selected_disp_pyr_cur,
+    __global float *ctemp, int h, int w, int nr_plane, int i,
    float cmax_disc_term, int cdisp_step1, int cmsg_step1, float cdisc_single_jump)
 {
    int y = get_global_id(1);
@@ -1313,7 +1313,7 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
        __global const float *disp = selected_disp_pyr_cur + y * cmsg_step1 + x;
        __global float *temp = ctemp + y * cmsg_step1 + x;

-        message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp, 
+        message_per_pixel_1(data, u, r - 1, u + cmsg_step1, l + 1, disp, disp - cmsg_step1, nr_plane, temp,
            cmax_disc_term, cdisp_step1, cdisc_single_jump);
        message_per_pixel_1(data, d, d - cmsg_step1, r - 1, l + 1, disp, disp + cmsg_step1, nr_plane, temp,
            cmax_disc_term, cdisp_step1, cdisc_single_jump);
@@ -1327,10 +1327,10 @@ __kernel void compute_message_1(__global float *u_, __global float *d_, __global
 ///////////////////////////////////////////////////////////////
 /////////////////////////// output ////////////////////////////
 ///////////////////////////////////////////////////////////////
-__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_, 
-    __global const short *r_, __global const short * data_cost_selected, 
+__kernel void compute_disp_0(__global const short *u_, __global const short *d_, __global const short *l_,
+    __global const short *r_, __global const short * data_cost_selected,
    __global const short *disp_selected_pyr,
-    __global short* disp, 
+    __global short* disp,
    int res_step, int cols, int rows, int nr_plane,
    int cmsg_step1, int cdisp_step1)
 {
@@ -1364,10 +1364,10 @@ __kernel void compute_disp_0(__global const short *u_, __global const short *d_,
        disp[res_step * y + x] = best;
    }
 }
-__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_, 
-    __global const float *r_, __global const float *data_cost_selected, 
+__kernel void compute_disp_1(__global const float *u_, __global const float *d_, __global const float *l_,
+    __global const float *r_, __global const float *data_cost_selected,
    __global const float *disp_selected_pyr,
-    __global short *disp, 
+    __global short *disp,
    int res_step, int cols, int rows, int nr_plane,
    int cmsg_step1, int cdisp_step1)
 {
--- a/modules/ocl/src/opencl/tvl1flow.cl
+++ b/modules/ocl/src/opencl/tvl1flow.cl
@@ -43,7 +43,7 @@
 //
 //M*/

-__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step, 
+__kernel void centeredGradientKernel(__global const float* src, int src_col, int src_row, int src_step,
 __global float* dx, __global float* dy, int dx_step)
 {
    int x = get_global_id(0);
@@ -51,9 +51,9 @@ __global float* dx, __global float* dy, int dx_step)

    if((x < src_col)&&(y < src_row))
    {
-		int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
-     	int src_x2 = (x - 1) > 0 ? (x -1) : 0;
-        
+        int src_x1 = (x + 1) < (src_col -1)? (x + 1) : (src_col - 1);
+        int src_x2 = (x - 1) > 0 ? (x -1) : 0;
+
        //if(src[y * src_step + src_x1] == src[y * src_step+ src_x2])
        //{
        //    printf("y = %d\n", y);
@@ -61,8 +61,8 @@ __global float* dx, __global float* dy, int dx_step)
        //    printf("src_x2 = %d\n", src_x2);
        //}
        dx[y * dx_step+ x] = 0.5f * (src[y * src_step + src_x1] - src[y * src_step+ src_x2]);
-        
-		int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
+
+        int src_y1 = (y+1) < (src_row - 1) ? (y + 1) : (src_row - 1);
        int src_y2 = (y - 1) > 0 ? (y - 1) : 0;
        dy[y * dx_step+ x] = 0.5f * (src[src_y1 * src_step + x] - src[src_y2 * src_step+ x]);
    }
@@ -89,20 +89,20 @@ float bicubicCoeff(float x_)
 }

 __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_col, int I0_row,
-	image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,  
-    __global const float* u1, int u1_step, 
+    image2d_t tex_I1, image2d_t tex_I1x, image2d_t tex_I1y,
+    __global const float* u1, int u1_step,
    __global const float* u2,
    __global float* I1w,
-	__global float* I1wx, /*int I1wx_step,*/
-	__global float* I1wy, /*int I1wy_step,*/
-	__global float* grad, /*int grad_step,*/
-	__global float* rho,
-	int I1w_step,
-	int u2_step,
-	int u1_offset_x,
-	int u1_offset_y,
-	int u2_offset_x,
-	int u2_offset_y)
+    __global float* I1wx, /*int I1wx_step,*/
+    __global float* I1wy, /*int I1wy_step,*/
+    __global float* grad, /*int grad_step,*/
+    __global float* rho,
+    int I1w_step,
+    int u2_step,
+    int u1_offset_x,
+    int u1_offset_y,
+    int u2_offset_x,
+    int u2_offset_y)
 {
    const int x = get_global_id(0);
    const int y = get_global_id(1);
@@ -136,7 +136,7 @@ __kernel void warpBackwardKernel(__global const float* I0, int I0_step, int I0_c
                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);

                //sum  += w * tex2D(tex_I1 , cx, cy);
-				int2 cood = (int2)(cx, cy);
+                int2 cood = (int2)(cx, cy);
                sum += w * read_imagef(tex_I1, sampleri, cood).x;
                //sumx += w * tex2D(tex_I1x, cx, cy);
                sumx += w * read_imagef(tex_I1x, sampleri, cood).x;
@@ -181,18 +181,18 @@ float readImage(__global const float *image,  const int x,  const int y,  const
 }

 __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step, int I0_col, int I0_row,
-	__global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,  
-    __global const float* u1, int u1_step, 
+    __global const float* tex_I1, __global const float* tex_I1x, __global const float* tex_I1y,
+    __global const float* u1, int u1_step,
    __global const float* u2,
    __global float* I1w,
-	__global float* I1wx, /*int I1wx_step,*/
-	__global float* I1wy, /*int I1wy_step,*/
-	__global float* grad, /*int grad_step,*/
-	__global float* rho,
-	int I1w_step,
-	int u2_step,
-	int I1_step,
-	int I1x_step)
+    __global float* I1wx, /*int I1wx_step,*/
+    __global float* I1wy, /*int I1wy_step,*/
+    __global float* grad, /*int grad_step,*/
+    __global float* rho,
+    int I1w_step,
+    int u2_step,
+    int I1_step,
+    int I1x_step)
 {
    const int x = get_global_id(0);
    const int y = get_global_id(1);
@@ -224,7 +224,7 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
            {
                const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);

-				int2 cood = (int2)(cx, cy);
+                int2 cood = (int2)(cx, cy);
                sum += w * readImage(tex_I1, cood.x, cood.y, I0_col, I0_row, I1_step);
                sumx += w * readImage(tex_I1x, cood.x, cood.y, I0_col, I0_row, I1x_step);
                sumy += w * readImage(tex_I1y, cood.x, cood.y, I0_col, I0_row, I1x_step);
@@ -256,18 +256,18 @@ __kernel void warpBackwardKernelNoImage2d(__global const float* I0, int I0_step,
 }


-__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step, 
-    __global const float* u2, 
-    __global float* p11, int p11_step, 
+__kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col, int u1_row, int u1_step,
+    __global const float* u2,
+    __global float* p11, int p11_step,
    __global float* p12,
    __global float* p21,
-    __global float* p22, 
+    __global float* p22,
    const float taut,
-	int u2_step,
-	int u1_offset_x,
-	int u1_offset_y,
-	int u2_offset_x,
-	int u2_offset_y)
+    int u2_step,
+    int u1_offset_x,
+    int u1_offset_y,
+    int u2_offset_x,
+    int u2_offset_y)
 {

    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
@@ -277,16 +277,16 @@ __kernel void estimateDualVariablesKernel(__global const float* u1, int u1_col,

    if(x < u1_col && y < u1_row)
    {
-		int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+        int src_x1 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
        const float u1x = u1[(y + u1_offset_y) * u1_step + src_x1 + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];
-        
-		int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
+
+        int src_y1 = (y + 1) < (u1_row - 1) ? (y + 1) : (u1_row - 1);
        const float u1y = u1[(src_y1 + u1_offset_y) * u1_step + x + u1_offset_x] - u1[(y + u1_offset_y) * u1_step + x + u1_offset_x];

-		int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
+        int src_x2 = (x + 1) < (u1_col - 1) ? (x + 1) : (u1_col - 1);
        const float u2x = u2[(y + u2_offset_y) * u2_step + src_x2 + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];

-		int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
+        int src_y2 = (y + 1) <  (u1_row - 1) ? (y + 1) : (u1_row - 1);
        const float u2y = u2[(src_y2 + u2_offset_y) * u2_step + x + u2_offset_x] - u2[(y + u2_offset_y) * u2_step + x + u2_offset_x];

        const float g1 = hypot(u1x, u1y);
@@ -329,19 +329,19 @@ float divergence(__global const float* v1, __global const float* v2, int y, int

 __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx_row, int I1wx_step,
    __global const float* I1wy, /*int I1wy_step,*/
-    __global const float* grad, /*int grad_step,*/ 
+    __global const float* grad, /*int grad_step,*/
    __global const float* rho_c, /*int rho_c_step,*/
    __global const float* p11, /*int p11_step,*/
    __global const float* p12, /*int p12_step,*/
    __global const float* p21, /*int p21_step,*/
    __global const float* p22, /*int p22_step,*/
-    __global float* u1, int u1_step, 
-    __global float* u2, 
+    __global float* u1, int u1_step,
+    __global float* u2,
    __global float* error, const float l_t, const float theta, int u2_step,
-	int u1_offset_x,
-	int u1_offset_y,
-	int u2_offset_x,
-	int u2_offset_y)
+    int u1_offset_x,
+    int u1_offset_y,
+    int u2_offset_x,
+    int u2_offset_y)
 {

    //const int x = blockIdx.x * blockDim.x + threadIdx.x;
--- a/modules/ocl/src/pyrdown.cpp
+++ b/modules/ocl/src/pyrdown.cpp
@@ -109,4 +109,3 @@ void cv::ocl::pyrDown(const oclMat &src, oclMat &dst)

    pyrdown_run(src, dst);
 }
-
--- a/modules/ocl/src/pyrup.cpp
+++ b/modules/ocl/src/pyrup.cpp
@@ -85,4 +85,4 @@ namespace cv
            openCLExecuteKernel(clCxt, &pyr_up, kernelName, globalThreads, localThreads, args, src.oclchannels(), src.depth());
        }
    }
-}
+}
--- a/modules/ocl/src/sort_by_key.cpp
+++ b/modules/ocl/src/sort_by_key.cpp
@@ -160,7 +160,7 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t vecSize, bool isGreater

 namespace radix_sort
 {
-//FIXME(pengx17): 
+//FIXME(pengx17):
 // exclusive scan, need to be optimized as this is too naive...
 //void naive_scan_addition(oclMat& input, oclMat& output)
 //{
@@ -247,8 +247,8 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGre
            }
        }
        ocl::copyMakeBorder(
-            keys(Rect(0,0,origVecSize,1)), buffer_keys, 
-            0, 0, 0, vecSize - origVecSize, 
+            keys(Rect(0,0,origVecSize,1)), buffer_keys,
+            0, 0, 0, vecSize - origVecSize,
            BORDER_CONSTANT, padding_value);
        vals(Rect(0,0,origVecSize,1)).copyTo(roi_buffer_vals);
        newBuffer = true;
@@ -274,7 +274,7 @@ static void sortByKey(oclMat& keys, oclMat& vals, size_t origVecSize, bool isGre
    genSortBuildOption(keys, vals, isGreaterThan, build_opt_buf);

    //additional build option for radix sort
-    sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT"); 
+    sprintf(build_opt_buf + strlen(build_opt_buf), " -D K_%s", isKeyFloat?"FLT":"INT");

    String kernelnames[2] = {String("histogramRadixN"), String("permuteRadixN")};

--- a/modules/ocl/test/test_arithm.cpp
+++ b/modules/ocl/test/test_arithm.cpp
@@ -178,7 +178,7 @@ PARAM_TEST_CASE(ArithmTestBase, MatType, bool)
    }

    void Near1(double threshold = 0.)
-    {     
+    {
        EXPECT_MAT_NEAR(dst1, Mat(gdst1_whole), threshold);
    }

--- a/modules/ocl/test/test_blend.cpp
+++ b/modules/ocl/test/test_blend.cpp
@@ -116,4 +116,4 @@ INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Blend, Combine(
                            DIFFERENT_SIZES,
                            testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4), MatType(CV_32FC1), MatType(CV_32FC4))
                        ));
-#endif
+#endif
--- a/modules/ocl/test/test_canny.cpp
+++ b/modules/ocl/test/test_canny.cpp
@@ -86,4 +86,4 @@ TEST_P(Canny, Accuracy)
 INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Canny, testing::Combine(
                            testing::Values(AppertureSize(3), AppertureSize(5)),
                            testing::Values(L2gradient(false), L2gradient(true))));
-#endif
+#endif
--- a/modules/ocl/test/test_filters.cpp
+++ b/modules/ocl/test/test_filters.cpp
@@ -57,8 +57,8 @@ using namespace testing;
 using namespace std;


-PARAM_TEST_CASE(FilterTestBase, 
-                MatType, 
+PARAM_TEST_CASE(FilterTestBase,
+                MatType,
                cv::Size, // kernel size
                cv::Size, // dx,dy
                int       // border type, or iteration
@@ -367,7 +367,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Laplacian, Combine(
                        Values(0)));        //not use

 INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(
-                        Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), 
+                        Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4),
                        Values(Size(0, 0)), //not use
                        Values(Size(0, 0)), //not use
                        Values(1)));
@@ -383,7 +383,7 @@ INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(
 INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine(
                        Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC4),
                        Values(Size(0, 0)), //not use
-                        Values(Size(0, 1), Size(1, 0)), 
+                        Values(Size(0, 1), Size(1, 0)),
                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE)));

 INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(
@@ -395,7 +395,7 @@ INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine(


 INSTANTIATE_TEST_CASE_P(Filter, Filter2D, testing::Combine(
-                        Values(CV_8UC1, CV_32FC1, CV_32FC4), 
+                        Values(CV_8UC1, CV_32FC1, CV_32FC4),
                        Values(Size(3, 3), Size(15, 15), Size(25, 25)),
                        Values(Size(0, 0)), //not use
                        Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REFLECT101, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT)));
--- a/modules/ocl/test/test_imgproc.cpp
+++ b/modules/ocl/test/test_imgproc.cpp
@@ -448,7 +448,7 @@ PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bo
    {
        cv::Mat cpu_cldst;
        cldst.download(cpu_cldst);
-        EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);       
+        EXPECT_MAT_NEAR(dst, cpu_cldst, threshold);
    }
 };
 ////////////////////////////////equalizeHist//////////////////////////////////////////
--- a/modules/ocl/test/test_kmeans.cpp
+++ b/modules/ocl/test/test_kmeans.cpp
@@ -87,7 +87,7 @@ PARAM_TEST_CASE(Kmeans, int, int, int)
            for(int j = 0; j < nchannel; j++)
                center_row_header.at<float>(0, i*nchannel+j) = 50000.0;

-            for(int j = 0; (j < max_neighbour) || 
+            for(int j = 0; (j < max_neighbour) ||
                           (i == K-1 && j < max_neighbour + MHEIGHT%K); j ++)
            {
                Mat cur_row_header = src.row(row_idx + 1 + j);
@@ -121,15 +121,15 @@ TEST_P(Kmeans, Mat){
        ocl::kmeans(d_src, K, d_labels,
            TermCriteria( TermCriteria::EPS + TermCriteria::MAX_ITER, 100, 0),
            1, flags, d_centers);
-  
+
        Mat dd_labels(d_labels);
        Mat dd_centers(d_centers);
        if(flags & KMEANS_USE_INITIAL_LABELS)
        {
            EXPECT_MAT_NEAR(labels, dd_labels, 0);
            EXPECT_MAT_NEAR(centers, dd_centers, 1e-3);
-        } 
-        else 
+        }
+        else
        {
            int row_idx = 0;
            for(int i = 0; i < K; i++)
@@ -157,6 +157,6 @@ TEST_P(Kmeans, Mat){
 INSTANTIATE_TEST_CASE_P(OCL_ML, Kmeans, Combine(
    Values(3, 5, 8),
    Values(CV_32FC1, CV_32FC2, CV_32FC4),
-    Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/))); 
+    Values(OCL_KMEANS_USE_INITIAL_LABELS/*, OCL_KMEANS_PP_CENTERS*/)));

 #endif
--- a/modules/ocl/test/test_optflow.cpp
+++ b/modules/ocl/test/test_optflow.cpp
@@ -85,7 +85,7 @@ TEST_P(GoodFeaturesToTrack, Accuracy)
    ASSERT_FALSE(d_pts.empty());

    std::vector<cv::Point2f> pts(d_pts.cols);
-    
+
    detector.downloadPoints(d_pts, pts);

    std::vector<cv::Point2f> pts_gold;
@@ -125,7 +125,7 @@ TEST_P(GoodFeaturesToTrack, EmptyCorners)
    ASSERT_TRUE(corners.empty());
 }

-INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack, 
+INSTANTIATE_TEST_CASE_P(OCL_Video, GoodFeaturesToTrack,
    testing::Values(MinDistance(0.0), MinDistance(3.0)));

 //////////////////////////////////////////////////////////////////////////
@@ -342,4 +342,3 @@ INSTANTIATE_TEST_CASE_P(OCL_Video, Farneback, testing::Combine(
    testing::Values(UseInitFlow(false), UseInitFlow(true))));

 #endif // HAVE_OPENCL
-
--- a/modules/ocl/test/test_pyramids.cpp
+++ b/modules/ocl/test/test_pyramids.cpp
@@ -79,7 +79,7 @@ TEST_P(PyrDown, Mat)
        Size size(MWIDTH, MHEIGHT);
        Mat src = randomMat(size, CV_MAKETYPE(type, channels));
        oclMat gsrc(src);
-        
+
        pyrDown(src, dst_cpu);
        pyrDown(gsrc, gdst);

--- a/modules/ocl/test/test_sort.cpp
+++ b/modules/ocl/test/test_sort.cpp
@@ -59,7 +59,7 @@ IMPLEMENT_PARAM_CLASS(InputSize, int)
 IMPLEMENT_PARAM_CLASS(SortMethod, int)


-template<class T> 
+template<class T>
 struct KV_CVTYPE{ static int toType() {return 0;} };

 template<> struct KV_CVTYPE<int>  { static int toType() {return CV_32SC1;} };
@@ -101,7 +101,7 @@ void kvquicksort(Mat& keys, Mat& vals, bool isGreater = false)
 {
    vector<pair<key_type, val_type> > kvres;
    toKVPair(keys.begin<key_type>(), vals.begin<val_type>(), keys.cols, kvres);
-    
+
    if(isGreater)
    {
        std::sort(kvres.begin(), kvres.end(), kvgreater<key_type, val_type>);
@@ -180,7 +180,7 @@ bool checkUnstableSorterResult(const Mat& gkeys_, const Mat& gvals_,
        {
            ++ iden_count;
        }
-        
+
        // sort dv and gv
        int num_of_val = (iden_count + 1) * cn_val;
        std::sort(gvptr + i * cn_val, gvptr + i * cn_val + num_of_val);
--- a/modules/ocl/test/utility.cpp
+++ b/modules/ocl/test/utility.cpp
@@ -225,7 +225,7 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
        cpu_result.setTo(0);

        for(vector<Rect>::const_iterator r = ob1.begin(); r != ob1.end(); r++)
-        {      
+        {
            cv::Mat cpu_result_roi(cpu_result, *r);
            cpu_result_roi.setTo(1);
            cpu_result.copyTo(cpu_result);
@@ -251,4 +251,3 @@ double checkRectSimilarity(Size sz, std::vector<Rect>& ob1, std::vector<Rect>& o
    }
    return final_test_result;
 }
-