generalized OpenCL version of cv::sepFilter2D; removed some restrictions and added 3-channels support

2014-03-19 18:49:33 +04:00 · 2014-03-19 18:49:33 +04:00 · 291458a859
commit 291458a859
parent b449b0bf71
6 changed files with 112 additions and 89 deletions
--- a/modules/core/src/ocl.cpp
+++ b/modules/core/src/ocl.cpp
@ -4317,8 +4317,8 @@ String kernelToStr(InputArray _kernel, int ddepth, const char * name)
    if (ddepth != depth)
        kernel.convertTo(kernel, ddepth);

-    typedef std::string (*func_t)(const Mat &);
-    static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>,kerToStr<short>,
+    typedef std::string (* func_t)(const Mat &);
+    static const func_t funcs[] = { kerToStr<uchar>, kerToStr<char>, kerToStr<ushort>, kerToStr<short>,
                                    kerToStr<int>, kerToStr<float>, kerToStr<double>, 0 };
    const func_t func = funcs[depth];
    CV_Assert(func != 0);
--- a/modules/imgproc/src/filter.cpp
+++ b/modules/imgproc/src/filter.cpp
@ -41,7 +41,6 @@
 //M*/

 #include "precomp.hpp"
-#define CV_OPENCL_RUN_ASSERT
 #include "opencl_kernels.hpp"
 #include <sstream>

@ -3135,7 +3134,7 @@ template<typename ST, class CastOp, class VecOp> struct Filter2D : public BaseFi
 // b e h b e h 0 0
 // c f i c f i 0 0
 template <typename T>
-static int _prepareKernelFilter2D(std::vector<T>& data, const Mat &kernel)
+static int _prepareKernelFilter2D(std::vector<T> & data, const Mat & kernel)
 {
    Mat _kernel; kernel.convertTo(_kernel, DataDepth<T>::value);
    int size_y_aligned = ROUNDUP(kernel.rows * 2, 4);
@ -3318,11 +3317,16 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
    return kernel.run(2, globalsize, localsize, true);
 }

-static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType)
+static bool ocl_sepRowFilter2D(const UMat & src, UMat & buf, const Mat & kernelX, int anchor,
+                               int borderType, int ddepth, bool fast8uc1)
 {
    int type = src.type(), cn = CV_MAT_CN(type), sdepth = CV_MAT_DEPTH(type);
+    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
    Size bufSize = buf.size();

+    if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F))
+        return false;
+
 #ifdef ANDROID
    size_t localsize[2] = {16, 10};
 #else
@ -3330,7 +3334,7 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
 #endif

    size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]};
-    if (type == CV_8UC1)
+    if (fast8uc1)
        globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0];

    int radiusX = anchor, radiusY = (buf.rows - src.rows) >> 1;
@ -3346,20 +3350,21 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,

    char cvt[40];
    cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s"
-                                          " -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s",
+                                          " -D srcT=%s -D dstT=%s -D convertToDstT=%s -D srcT1=%s -D dstT1=%s%s",
                                          radiusX, (int)localsize[0], (int)localsize[1], cn, btype,
                                          extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
                                          isolated ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED",
                                          ocl::typeToStr(type), ocl::typeToStr(CV_32FC(cn)),
                                          ocl::convertTypeStr(sdepth, CV_32F, cn, cvt),
-                                          ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F));
+                                          ocl::typeToStr(sdepth), ocl::typeToStr(CV_32F),
+                                          doubleSupport ? " -D DOUBLE_SUPPORT" : "");
    build_options += ocl::kernelToStr(kernelX, CV_32F);

    Size srcWholeSize; Point srcOffset;
    src.locateROI(srcWholeSize, srcOffset);

    String kernelName("row_filter");
-    if (type == CV_8UC1)
+    if (fast8uc1)
        kernelName += "_C1_D0";

    ocl::Kernel k(kernelName.c_str(), cv::ocl::imgproc::filterSepRow_oclsrc,
@ -3367,39 +3372,47 @@ static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor,
    if (k.empty())
        return false;

-    k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x,
-           srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
-           ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()),
-           buf.cols, buf.rows, radiusY);
+    if (fast8uc1)
+        k.args(ocl::KernelArg::PtrReadOnly(src), (int)(src.step / src.elemSize()), srcOffset.x,
+               srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
+               ocl::KernelArg::PtrWriteOnly(buf), (int)(buf.step / buf.elemSize()),
+               buf.cols, buf.rows, radiusY);
+    else
+        k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, srcOffset.x,
+               srcOffset.y, src.cols, src.rows, srcWholeSize.width, srcWholeSize.height,
+               ocl::KernelArg::PtrWriteOnly(buf), (int)buf.step, buf.cols, buf.rows, radiusY);

    return k.run(2, globalsize, localsize, false);
 }

-static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anchor)
+static bool ocl_sepColFilter2D(const UMat & buf, UMat & dst, const Mat & kernelY, int anchor)
 {
+    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
+    if (dst.depth() == CV_64F && !doubleSupport)
+        return false;
+
 #ifdef ANDROID
-    size_t localsize[2] = {16, 10};
+    size_t localsize[2] = { 16, 10 };
 #else
-    size_t localsize[2] = {16, 16};
+    size_t localsize[2] = { 16, 16 };
 #endif
-    size_t globalsize[2] = {0, 0};
+    size_t globalsize[2] = { 0, 0 };

    int dtype = dst.type(), cn = CV_MAT_CN(dtype), ddepth = CV_MAT_DEPTH(dtype);
    Size sz = dst.size();

    globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
-
-    if (dtype == CV_8UC2)
-        globalsize[0] = DIVUP((sz.width + 1) / 2, localsize[0]) * localsize[0];
-    else
-        globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
+    globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];

    char cvt[40];
    cv::String build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d"
-                                          " -D srcT=%s -D dstT=%s -D convertToDstT=%s",
+                                          " -D srcT=%s -D dstT=%s -D convertToDstT=%s"
+                                          " -D srcT1=%s -D dstT1=%s%s",
                                          anchor, (int)localsize[0], (int)localsize[1], cn,
                                          ocl::typeToStr(buf.type()), ocl::typeToStr(dtype),
-                                          ocl::convertTypeStr(CV_32F, ddepth, cn, cvt));
+                                          ocl::convertTypeStr(CV_32F, ddepth, cn, cvt),
+                                          ocl::typeToStr(CV_32F), ocl::typeToStr(ddepth),
+                                          doubleSupport ? " -D DOUBLE_SUPPORT" : "");
    build_options += ocl::kernelToStr(kernelY, CV_32F);

    ocl::Kernel k("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc,
@ -3407,13 +3420,13 @@ static bool ocl_sepColFilter2D(const UMat &buf, UMat &dst, Mat &kernelY, int anc
    if (k.empty())
        return false;

-    k.args(ocl::KernelArg::PtrReadOnly(buf), (int)(buf.step / buf.elemSize()), buf.cols,
-           buf.rows, ocl::KernelArg::PtrWriteOnly(dst), (int)(dst.offset / dst.elemSize()),
-           (int)(dst.step / dst.elemSize()), dst.cols, dst.rows);
+    k.args(ocl::KernelArg::ReadOnly(buf), ocl::KernelArg::WriteOnly(dst));

    return k.run(2, globalsize, localsize, false);
 }

+#if 0
+
 const int optimizedSepFilterLocalSize = 16;

 static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
@ -3471,18 +3484,19 @@ static bool ocl_sepFilter2D_SinglePass(InputArray _src, OutputArray _dst,
    return k.run(2, gt2, lt2, false);
 }

+#endif
+
 static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
                      InputArray _kernelX, InputArray _kernelY, Point anchor,
                      double delta, int borderType )
 {
-    Size imgSize = _src.size();
+//    Size imgSize = _src.size();

    if (abs(delta)> FLT_MIN)
        return false;

    int type = _src.type(), cn = CV_MAT_CN(type);
-    if ( !( (type == CV_8UC1 || type == CV_8UC4 || type == CV_32FC1 || type == CV_32FC4) &&
-            (ddepth == CV_32F || ddepth == CV_16S || ddepth == CV_8U || ddepth < 0) ) )
+    if (cn > 4)
        return false;

    Mat kernelX = _kernelX.getMat().reshape(1, 1);
@ -3501,9 +3515,6 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
    if (ddepth < 0)
        ddepth = sdepth;

-//    printf("%d %d\n", imgSize.width, optimizedSepFilterLocalSize + (kernelX.rows >> 1));
-//    printf("%d %d\n", imgSize.height, optimizedSepFilterLocalSize + (kernelY.rows >> 1));
-
 //    CV_OCL_RUN_(kernelY.rows <= 21 && kernelX.rows <= 21 &&
 //        imgSize.width > optimizedSepFilterLocalSize + (kernelX.rows >> 1) &&
 //        imgSize.height > optimizedSepFilterLocalSize + (kernelY.rows >> 1),
@ -3512,20 +3523,19 @@ static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
    UMat src = _src.getUMat();
    Size srcWholeSize; Point srcOffset;
    src.locateROI(srcWholeSize, srcOffset);
-    if ( (0 != (srcOffset.x % 4))   ||
-         (0 != (src.cols % 4))      ||
-         (0 != ((src.step / src.elemSize()) % 4))
-       )
-        return false;
+
+    bool fast8uc1 = type == CV_8UC1 && srcOffset.x % 4 == 0 &&
+            src.cols % 4 == 0 && src.step % 4 == 0;

    Size srcSize = src.size();
    Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
-    UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn));
-    if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType))
+    UMat buf(bufSize, CV_32FC(cn));
+    if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, ddepth, fast8uc1))
        return false;

    _dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
    UMat dst = _dst.getUMat();
+
    return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y);
 }

--- a/modules/imgproc/src/opencl/filterSepCol.cl
+++ b/modules/imgproc/src/opencl/filterSepCol.cl
@ -34,29 +34,36 @@
 //
 //

+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#endif
+
 #define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
 #define RADIUS 1

 #define noconvert

-/**********************************************************************************
-These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
-Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
-kernel must be in the center. ROI is not supported either.
-Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed
-from LDS to calculate the result.
-The length of the convovle kernel supported is only related to the MAX size of LDS,
-which is HW related.
-Niko
-6/29/2011
-The info above maybe obsolete.
-***********************************************************************************/
+#if CN != 3
+#define loadpix(addr) *(__global const srcT *)(addr)
+#define storepix(val, addr)  *(__global dstT *)(addr) = val
+#define SRCSIZE (int)sizeof(srcT)
+#define DSTSIZE (int)sizeof(dstT)
+#else
+#define loadpix(addr)  vload3(0, (__global const srcT1 *)(addr))
+#define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
+#define SRCSIZE (int)sizeof(srcT1)*3
+#define DSTSIZE (int)sizeof(dstT1)*3
+#endif

 #define DIG(a) a,
 __constant float mat_kernel[] = { COEFF };

-__kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int src_whole_cols, int src_whole_rows,
-                         __global dstT * dst, int dst_offset_in_pixel, int dst_step_in_pixel, int dst_cols, int dst_rows)
+__kernel void col_filter(__global const uchar * src, int src_step, int src_offset, int src_whole_rows, int src_whole_cols,
+                         __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
 {
    int x = get_global_id(0);
    int y = get_global_id(1);
@ -64,8 +71,8 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
    int l_x = get_local_id(0);
    int l_y = get_local_id(1);

-    int start_addr = mad24(y, src_step_in_pixel, x);
-    int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
+    int start_addr = mad24(y, src_step, x * SRCSIZE);
+    int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);

    srcT sum, temp[READ_TIMES_COL];
    __local srcT LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
@ -73,9 +80,9 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
    // read pixels from src
    for (int i = 0; i < READ_TIMES_COL; ++i)
    {
-        int current_addr = mad24(i, LSIZE1 * src_step_in_pixel, start_addr);
+        int current_addr = mad24(i, LSIZE1 * src_step, start_addr);
        current_addr = current_addr < end_addr ? current_addr : 0;
-        temp[i] = src[current_addr];
+        temp[i] = loadpix(src + current_addr);
    }

    // save pixels to lds
@ -95,7 +102,7 @@ __kernel void col_filter(__global const srcT * src, int src_step_in_pixel, int s
    // write the result to dst
    if (x < dst_cols && y < dst_rows)
    {
-        start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel);
-        dst[start_addr] = convertToDstT(sum);
+        start_addr = mad24(y, dst_step, mad24(DSTSIZE, x, dst_offset));
+        storepix(convertToDstT(sum), dst + start_addr);
    }
 }
--- a/modules/imgproc/src/opencl/filterSepRow.cl
+++ b/modules/imgproc/src/opencl/filterSepRow.cl
@ -34,6 +34,14 @@
 //
 //

+#ifdef DOUBLE_SUPPORT
+#ifdef cl_amd_fp64
+#pragma OPENCL EXTENSION cl_amd_fp64:enable
+#elif defined (cl_khr_fp64)
+#pragma OPENCL EXTENSION cl_khr_fp64:enable
+#endif
+#endif
+
 #define READ_TIMES_ROW ((2*(RADIUSX+LSIZE0)-1)/LSIZE0) //for c4 only
 #define RADIUS 1

@ -117,16 +125,16 @@

 #define noconvert

-#if cn != 3
+#if CN != 3
 #define loadpix(addr) *(__global const srcT *)(addr)
 #define storepix(val, addr)  *(__global dstT *)(addr) = val
-#define SRCSIZE ((int)sizeof(srcT))
-#define DSTSIZE ((int)sizeof(dstT))
+#define SRCSIZE (int)sizeof(srcT)
+#define DSTSIZE (int)sizeof(dstT)
 #else
 #define loadpix(addr)  vload3(0, (__global const srcT1 *)(addr))
 #define storepix(val, addr) vstore3(val, 0, (__global dstT1 *)(addr))
-#define SRCSIZE ((int)sizeof(srcT1)*3)
-#define DSTSIZE ((int)sizeof(dstT1)*3)
+#define SRCSIZE (int)sizeof(srcT1)*3
+#define DSTSIZE (int)sizeof(dstT1)*3
 #endif

 #define DIG(a) a,
@ -269,32 +277,33 @@ __kernel void row_filter_C1_D0(__global const uchar * src, int src_step_in_pixel
        dst[start_addr] = sum.x;
 }

-__kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int src_offset_x, int src_offset_y,
+__kernel void row_filter(__global const uchar * src, int src_step, int src_offset_x, int src_offset_y,
                         int src_cols, int src_rows, int src_whole_cols, int src_whole_rows,
-                         __global dstT * dst, int dst_step_in_pixel, int dst_cols, int dst_rows,
+                         __global uchar * dst, int dst_step, int dst_cols, int dst_rows,
                         int radiusy)
 {
    int x = get_global_id(0);
    int y = get_global_id(1);
    int l_x = get_local_id(0);
    int l_y = get_local_id(1);
+
    int start_x = x + src_offset_x - RADIUSX;
    int start_y = y + src_offset_y - radiusy;
-    int start_addr = mad24(start_y, src_step_in_pixel, start_x);
+    int start_addr = mad24(start_y, src_step, start_x * SRCSIZE);

    dstT sum;
    srcT temp[READ_TIMES_ROW];

    __local srcT LDS_DAT[LSIZE1][READ_TIMES_ROW * LSIZE0 + 1];
 #ifdef BORDER_CONSTANT
-    int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
+    int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * SRCSIZE);

    // read pixels from src
    for (int i = 0; i < READ_TIMES_ROW; i++)
    {
-        int current_addr = mad24(i, LSIZE0, start_addr);
-        current_addr = current_addr < end_addr && current_addr > 0 ? current_addr : 0;
-        temp[i] = src[current_addr];
+        int current_addr = mad24(i, LSIZE0 * SRCSIZE, start_addr);
+        current_addr = current_addr < end_addr && current_addr >= 0 ? current_addr : 0;
+        temp[i] = loadpix(src + current_addr);
    }

    // judge if read out of boundary
@ -312,8 +321,7 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
    }
 #endif
 #else
-    int index[READ_TIMES_ROW];
-    int s_x, s_y;
+    int index[READ_TIMES_ROW], s_x, s_y;

    // judge if read out of boundary
    for (int i = 0; i < READ_TIMES_ROW; ++i)
@ -328,12 +336,12 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
        EXTRAPOLATE(s_x, 0, src_whole_cols);
        EXTRAPOLATE(s_y, 0, src_whole_rows);
 #endif
-        index[i] = mad24(s_y, src_step_in_pixel, s_x);
+        index[i] = mad24(s_y, src_step, s_x * SRCSIZE);
    }

    // read pixels from src
    for (int i = 0; i < READ_TIMES_ROW; ++i)
-        temp[i] = src[index[i]];
+        temp[i] = loadpix(src + index[i]);
 #endif // BORDER_CONSTANT

    // save pixels to lds
@ -349,10 +357,11 @@ __kernel void row_filter(__global const srcT * src, int src_step_in_pixel, int s
        temp[1] = LDS_DAT[l_y][l_x + RADIUSX + i];
        sum += mad(convertToDstT(temp[0]), mat_kernel[RADIUSX - i], convertToDstT(temp[1]) * mat_kernel[RADIUSX + i]);
    }
+
    // write the result to dst
    if (x < dst_cols && y < dst_rows)
    {
-        start_addr = mad24(y, dst_step_in_pixel, x);
-        dst[start_addr] = sum;
+        start_addr = mad24(y, dst_step, x * DSTSIZE);
+        storepix(sum, dst + start_addr);
    }
 }
--- a/modules/imgproc/test/ocl/test_filters.cpp
+++ b/modules/imgproc/test/ocl/test_filters.cpp
@ -312,7 +312,7 @@ OCL_TEST_P(MorphologyEx, Mat)
            (int)BORDER_REFLECT|BORDER_ISOLATED, (int)BORDER_WRAP|BORDER_ISOLATED, \
            (int)BORDER_REFLECT_101|BORDER_ISOLATED*/) // WRAP and ISOLATED are not supported by cv:: version

-#define FILTER_TYPES Values(CV_8UC1, CV_8UC2, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4)
+#define FILTER_TYPES Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4)

 OCL_INSTANTIATE_TEST_CASE_P(Filter, Bilateral, Combine(
                            Values((MatType)CV_8UC1),
--- a/modules/imgproc/test/ocl/test_sepfilter2D.cpp
+++ b/modules/imgproc/test/ocl/test_sepfilter2D.cpp
@ -75,9 +75,9 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
    void random_roi()
    {
        Size ksize = randomSize(kernelMinSize, kernelMaxSize);
-        if (1 != (ksize.width % 2))
+        if (1 != ksize.width % 2)
            ksize.width++;
-        if (1 != (ksize.height % 2))
+        if (1 != ksize.height % 2)
            ksize.height++;

        Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
@ -86,24 +86,22 @@ PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
        cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);

        Size roiSize = randomSize(ksize.width + 16, MAX_VALUE, ksize.height + 20, MAX_VALUE);
-        std::cout << roiSize << std::endl;
        int rest = roiSize.width % 4;
-        if (0 != rest)
+        if (rest != 0)
            roiSize.width += (4 - rest);
        Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
        rest = srcBorder.lef % 4;
-        if (0 != rest)
+        if (rest != 0)
            srcBorder.lef += (4 - rest);
        rest = srcBorder.rig % 4;
-        if (0 != rest)
+        if (rest != 0)
            srcBorder.rig += (4 - rest);
        randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);

        Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
        randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);

-        anchor.x = -1;
-        anchor.y = -1;
+        anchor.x = anchor.y = -1;

        UMAT_UPLOAD_INPUT_PARAMETER(src)
        UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
@ -128,11 +126,10 @@ OCL_TEST_P(SepFilter2D, Mat)
    }
 }

-
 OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
                            Combine(
                                Values(CV_8U, CV_32F),
-                                Values(1, 4),
+                                OCL_ALL_CHANNELS,
                                Values(
                                        (BorderType)BORDER_CONSTANT,
                                        (BorderType)BORDER_REPLICATE,