ported changes from PR #2867
This commit is contained in:
		@@ -1342,58 +1342,80 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_OPENCL
 | 
			
		||||
 | 
			
		||||
static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
 | 
			
		||||
                              const Size & ksize, const Point & anchor, int iterations, int op)
 | 
			
		||||
static bool ocl_morphOp(InputArray _src, OutputArray _dst, InputArray _kernel,
 | 
			
		||||
                        Point anchor, int iterations, int op, int borderType,
 | 
			
		||||
                        const Scalar& borderValue)
 | 
			
		||||
{
 | 
			
		||||
    CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE);
 | 
			
		||||
    if (borderType != BORDER_CONSTANT)
 | 
			
		||||
        return false;
 | 
			
		||||
 | 
			
		||||
    Mat kernel = _kernel.getMat();
 | 
			
		||||
    Size ksize = kernel.data ? kernel.size() : Size(3,3);
 | 
			
		||||
    anchor = normalizeAnchor(anchor, ksize);
 | 
			
		||||
 | 
			
		||||
    if (iterations == 0 || kernel.rows*kernel.cols == 1)
 | 
			
		||||
    {
 | 
			
		||||
        _src.copyTo(_dst);
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!kernel.data)
 | 
			
		||||
    {
 | 
			
		||||
        kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2));
 | 
			
		||||
        anchor = Point(iterations, iterations);
 | 
			
		||||
        iterations = 1;
 | 
			
		||||
    }
 | 
			
		||||
    else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols )
 | 
			
		||||
    {
 | 
			
		||||
        anchor = Point(anchor.x*iterations, anchor.y*iterations);
 | 
			
		||||
        kernel = getStructuringElement(MORPH_RECT,
 | 
			
		||||
                                       Size(ksize.width + (iterations-1)*(ksize.width-1),
 | 
			
		||||
                                            ksize.height + (iterations-1)*(ksize.height-1)),
 | 
			
		||||
                                       anchor);
 | 
			
		||||
        iterations = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const ocl::Device & dev = ocl::Device::getDefault();
 | 
			
		||||
    int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
 | 
			
		||||
    bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
 | 
			
		||||
    bool doubleSupport = dev.doubleFPConfig() > 0;
 | 
			
		||||
 | 
			
		||||
    if (depth == CV_64F && !doubleSupport)
 | 
			
		||||
        return false;
 | 
			
		||||
 | 
			
		||||
    UMat kernel8U;
 | 
			
		||||
    kernel.convertTo(kernel8U, CV_8U);
 | 
			
		||||
    kernel8U = kernel8U.reshape(1, 1);
 | 
			
		||||
 | 
			
		||||
    bool rectKernel = true;
 | 
			
		||||
    {
 | 
			
		||||
        Mat m = kernel.reshape(1, 1);
 | 
			
		||||
        for (int i = 0; i < m.size().area(); ++i)
 | 
			
		||||
            if (m.at<uchar>(i) != 1)
 | 
			
		||||
            {
 | 
			
		||||
                rectKernel = false;
 | 
			
		||||
                break;
 | 
			
		||||
            }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    UMat src = _src.getUMat();
 | 
			
		||||
 | 
			
		||||
#ifdef ANDROID
 | 
			
		||||
    size_t localThreads[3] = {16, 8, 1};
 | 
			
		||||
    size_t localThreads[2] = { 16, 8 };
 | 
			
		||||
#else
 | 
			
		||||
    size_t localThreads[3] = {16, 16, 1};
 | 
			
		||||
    size_t localThreads[2] = { 16, 16 };
 | 
			
		||||
#endif
 | 
			
		||||
    size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
 | 
			
		||||
    size_t globalThreads[2] = { src.cols, src.rows };
 | 
			
		||||
 | 
			
		||||
    if (localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1))
 | 
			
		||||
        return false;
 | 
			
		||||
 | 
			
		||||
    static const char * const op2str[] = { "ERODE", "DILATE" };
 | 
			
		||||
    String buildOptions = format("-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s%s%s"
 | 
			
		||||
                                 " -D T=%s -D DEPTH_%d -D cn=%d -D T1=%s", anchor.x, anchor.y,
 | 
			
		||||
                                 (int)localThreads[0], (int)localThreads[1], op2str[op],
 | 
			
		||||
                                 doubleSupport ? " -D DOUBLE_SUPPORT" : "", rectKernel ? " -D RECTKERNEL" : "",
 | 
			
		||||
                                 ocl::typeToStr(_src.type()), _src.depth(), cn, ocl::typeToStr(depth));
 | 
			
		||||
    // build processing
 | 
			
		||||
    String processing;
 | 
			
		||||
    Mat kernel8u;
 | 
			
		||||
    kernel.convertTo(kernel8u, CV_8U);
 | 
			
		||||
    for (int y = 0; y < kernel8u.rows; ++y)
 | 
			
		||||
        for (int x = 0; x < kernel8u.cols; ++x)
 | 
			
		||||
            if (kernel8u.at<uchar>(y, x) != 0)
 | 
			
		||||
                processing += format("PROCESS(%d,%d)", y, x);
 | 
			
		||||
 | 
			
		||||
    std::vector<ocl::Kernel> kernels;
 | 
			
		||||
    static const char * const op2str[] = { "ERODE", "DILATE" };
 | 
			
		||||
    String buildOptions = format("-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s%s"
 | 
			
		||||
                                 " -D PROCESS_ELEMS=%s -D T=%s -D DEPTH_%d -D cn=%d -D T1=%s", anchor.x, anchor.y,
 | 
			
		||||
                                 (int)localThreads[0], (int)localThreads[1], op2str[op],
 | 
			
		||||
                                 doubleSupport ? " -D DOUBLE_SUPPORT" : "", processing.c_str(),
 | 
			
		||||
                                 ocl::typeToStr(type), depth, cn, ocl::typeToStr(depth));
 | 
			
		||||
 | 
			
		||||
    std::vector<ocl::Kernel> kernels(iterations);
 | 
			
		||||
    for (int i = 0; i < iterations; i++)
 | 
			
		||||
    {
 | 
			
		||||
        ocl::Kernel k("morph", ocl::imgproc::morph_oclsrc, buildOptions);
 | 
			
		||||
        if (k.empty())
 | 
			
		||||
        kernels[i].create("morph", ocl::imgproc::morph_oclsrc, buildOptions);
 | 
			
		||||
        if (kernels[i].empty())
 | 
			
		||||
            return false;
 | 
			
		||||
        kernels.push_back(k);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    _dst.create(src.size(), src.type());
 | 
			
		||||
@@ -1407,8 +1429,7 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
 | 
			
		||||
        int wholecols = wholesize.width, wholerows = wholesize.height;
 | 
			
		||||
 | 
			
		||||
        kernels[0].args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnlyNoSize(dst),
 | 
			
		||||
                        ofs.x, ofs.y, src.cols, src.rows, ocl::KernelArg::PtrReadOnly(kernel8U),
 | 
			
		||||
                        wholecols, wholerows);
 | 
			
		||||
                        ofs.x, ofs.y, src.cols, src.rows, wholecols, wholerows);
 | 
			
		||||
 | 
			
		||||
        return kernels[0].run(2, globalThreads, localThreads, false);
 | 
			
		||||
    }
 | 
			
		||||
@@ -1422,19 +1443,20 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
 | 
			
		||||
        if (i == 0)
 | 
			
		||||
        {
 | 
			
		||||
            int cols =  src.cols, rows = src.rows;
 | 
			
		||||
            src.locateROI(wholesize,ofs);
 | 
			
		||||
            src.locateROI(wholesize, ofs);
 | 
			
		||||
            src.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
 | 
			
		||||
            if(src.u != dst.u)
 | 
			
		||||
                source = src;
 | 
			
		||||
            else
 | 
			
		||||
                src.copyTo(source);
 | 
			
		||||
 | 
			
		||||
            src.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
 | 
			
		||||
            source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
 | 
			
		||||
        }
 | 
			
		||||
        else
 | 
			
		||||
        {
 | 
			
		||||
            int cols =  dst.cols, rows = dst.rows;
 | 
			
		||||
            dst.locateROI(wholesize,ofs);
 | 
			
		||||
            dst.locateROI(wholesize, ofs);
 | 
			
		||||
            dst.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
 | 
			
		||||
            dst.copyTo(source);
 | 
			
		||||
            dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
 | 
			
		||||
@@ -1443,12 +1465,12 @@ static bool ocl_morphology_op(InputArray _src, OutputArray _dst, Mat kernel,
 | 
			
		||||
        source.locateROI(wholesize, ofs);
 | 
			
		||||
 | 
			
		||||
        kernels[i].args(ocl::KernelArg::ReadOnlyNoSize(source), ocl::KernelArg::WriteOnlyNoSize(dst),
 | 
			
		||||
                        ofs.x, ofs.y, source.cols, source.rows, ocl::KernelArg::PtrReadOnly(kernel8U),
 | 
			
		||||
                        wholesize.width, wholesize.height);
 | 
			
		||||
                        ofs.x, ofs.y, source.cols, source.rows, wholesize.width, wholesize.height);
 | 
			
		||||
 | 
			
		||||
        if (!kernels[i].run(2, globalThreads, localThreads, false))
 | 
			
		||||
            return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -1459,10 +1481,10 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
 | 
			
		||||
                     Point anchor, int iterations,
 | 
			
		||||
                     int borderType, const Scalar& borderValue )
 | 
			
		||||
{
 | 
			
		||||
#ifdef HAVE_OPENCL
 | 
			
		||||
    int src_type = _src.type(),
 | 
			
		||||
        src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);
 | 
			
		||||
#endif
 | 
			
		||||
    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && _src.channels() <= 4 &&
 | 
			
		||||
               borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
 | 
			
		||||
               (op == MORPH_ERODE || op == MORPH_DILATE),
 | 
			
		||||
               ocl_morphOp(_src, _dst, _kernel, anchor, iterations, op, borderType, borderValue) )
 | 
			
		||||
 | 
			
		||||
    Mat kernel = _kernel.getMat();
 | 
			
		||||
    Size ksize = kernel.data ? kernel.size() : Size(3,3);
 | 
			
		||||
@@ -1490,12 +1512,6 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
 | 
			
		||||
        iterations = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && src_cn <= 4 &&
 | 
			
		||||
               (src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
 | 
			
		||||
               borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue() &&
 | 
			
		||||
               (op == MORPH_ERODE || op == MORPH_DILATE),
 | 
			
		||||
               ocl_morphology_op(_src, _dst, kernel, ksize, anchor, iterations, op) )
 | 
			
		||||
 | 
			
		||||
#if IPP_VERSION_X100 >= 801
 | 
			
		||||
    if( IPPMorphOp(op, _src, _dst, kernel, anchor, iterations, borderType, borderValue) )
 | 
			
		||||
        return;
 | 
			
		||||
@@ -1515,13 +1531,6 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
 | 
			
		||||
 | 
			
		||||
    parallel_for_(Range(0, nStripes),
 | 
			
		||||
                  MorphologyRunner(src, dst, nStripes, iterations, op, kernel, anchor, borderType, borderType, borderValue));
 | 
			
		||||
 | 
			
		||||
    //Ptr<FilterEngine> f = createMorphologyFilter(op, src.type(),
 | 
			
		||||
    //                                             kernel, anchor, borderType, borderType, borderValue );
 | 
			
		||||
 | 
			
		||||
    //f->apply( src, dst );
 | 
			
		||||
    //for( int i = 1; i < iterations; i++ )
 | 
			
		||||
    //    f->apply( dst, dst );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
@@ -1543,53 +1552,56 @@ void cv::dilate( InputArray src, OutputArray dst, InputArray kernel,
 | 
			
		||||
 | 
			
		||||
#ifdef HAVE_OPENCL
 | 
			
		||||
 | 
			
		||||
static void ocl_morphologyEx(InputArray _src, OutputArray _dst, int op,
 | 
			
		||||
namespace cv {
 | 
			
		||||
 | 
			
		||||
static bool ocl_morphologyEx(InputArray _src, OutputArray _dst, int op,
 | 
			
		||||
                             InputArray kernel, Point anchor, int iterations,
 | 
			
		||||
                             int borderType, const Scalar& borderValue)
 | 
			
		||||
{
 | 
			
		||||
    int type = _src.type(), cn = CV_MAT_CN(type);
 | 
			
		||||
    Size ksize = kernel.size();
 | 
			
		||||
 | 
			
		||||
    _dst.create(_src.size(), type);
 | 
			
		||||
    _dst.createSameSize(_src, _src.type());
 | 
			
		||||
    UMat temp;
 | 
			
		||||
 | 
			
		||||
    switch( op )
 | 
			
		||||
    {
 | 
			
		||||
    case MORPH_ERODE:
 | 
			
		||||
        erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue );
 | 
			
		||||
        break;
 | 
			
		||||
    case MORPH_DILATE:
 | 
			
		||||
        dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue );
 | 
			
		||||
        break;
 | 
			
		||||
    case MORPH_OPEN:
 | 
			
		||||
        erode( _src, temp, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        dilate( temp, _dst, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( _src, temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( temp, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue );
 | 
			
		||||
        break;
 | 
			
		||||
    case CV_MOP_CLOSE:
 | 
			
		||||
        dilate( _src, temp, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        erode( temp, _dst, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
    case MORPH_CLOSE:
 | 
			
		||||
        ocl_morphOp( _src, temp, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( temp, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue );
 | 
			
		||||
        break;
 | 
			
		||||
    case CV_MOP_GRADIENT:
 | 
			
		||||
    case MORPH_GRADIENT:
 | 
			
		||||
    // ??
 | 
			
		||||
        erode( _src, temp, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( _src, temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( _src, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue );
 | 
			
		||||
        subtract(_dst, temp, _dst);
 | 
			
		||||
        break;
 | 
			
		||||
    case CV_MOP_TOPHAT:
 | 
			
		||||
    case MORPH_TOPHAT:
 | 
			
		||||
    // ??
 | 
			
		||||
        erode( _src, temp, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        dilate( temp, _dst, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( _src, temp, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( temp, _dst, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue );
 | 
			
		||||
        subtract(_src, _dst, _dst);
 | 
			
		||||
        break;
 | 
			
		||||
    case CV_MOP_BLACKHAT:
 | 
			
		||||
    case MORPH_BLACKHAT:
 | 
			
		||||
    // ??
 | 
			
		||||
        dilate( _src, temp, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        erode( temp, _dst, kernel, anchor, iterations, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( _src, temp, kernel, anchor, iterations, MORPH_DILATE, borderType, borderValue );
 | 
			
		||||
        ocl_morphOp( temp, _dst, kernel, anchor, iterations, MORPH_ERODE, borderType, borderValue );
 | 
			
		||||
        subtract(_dst, _src, _dst);
 | 
			
		||||
        break;
 | 
			
		||||
    default:
 | 
			
		||||
        CV_Error( CV_StsBadArg, "unknown morphological operation" );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
@@ -1598,10 +1610,13 @@ void cv::morphologyEx( InputArray _src, OutputArray _dst, int op,
 | 
			
		||||
                       InputArray kernel, Point anchor, int iterations,
 | 
			
		||||
                       int borderType, const Scalar& borderValue )
 | 
			
		||||
{
 | 
			
		||||
    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && cn <= 4 &&
 | 
			
		||||
#ifdef HAVE_OPENCL
 | 
			
		||||
    Size ksize = kernel.size();
 | 
			
		||||
    CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2 && _src.channels() <= 4 &&
 | 
			
		||||
        anchor.x == ksize.width >> 1 && anchor.y == ksize.height >> 1 &&
 | 
			
		||||
        borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue(),
 | 
			
		||||
        borderType == cv::BORDER_CONSTANT,
 | 
			
		||||
        ocl_morphologyEx(_src, _dst, op, kernel, anchor, iterations, borderType, borderValue))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
    Mat src = _src.getMat(), temp;
 | 
			
		||||
    _dst.create(src.size(), src.type());
 | 
			
		||||
 
 | 
			
		||||
@@ -54,59 +54,70 @@
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef DEPTH_0
 | 
			
		||||
#ifdef ERODE
 | 
			
		||||
#define VAL 255
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef DILATE
 | 
			
		||||
#define VAL 0
 | 
			
		||||
#endif
 | 
			
		||||
#define MIN_VAL 0
 | 
			
		||||
#define MAX_VAL UCHAR_MAX
 | 
			
		||||
#elif defined DEPTH_1
 | 
			
		||||
#define MIN_VAL SCHAR_MIN
 | 
			
		||||
#define MAX_VAL SCHAR_MAX
 | 
			
		||||
#elif defined DEPTH_2
 | 
			
		||||
#define MIN_VAL 0
 | 
			
		||||
#define MAX_VAL USHRT_MAX
 | 
			
		||||
#elif defined DEPTH_3
 | 
			
		||||
#define MIN_VAL SHRT_MIN
 | 
			
		||||
#define MAX_VAL SHRT_MAX
 | 
			
		||||
#elif defined DEPTH_4
 | 
			
		||||
#define MIN_VAL INT_MIN
 | 
			
		||||
#define MAX_VAL INT_MAX
 | 
			
		||||
#elif defined DEPTH_5
 | 
			
		||||
#ifdef ERODE
 | 
			
		||||
#define VAL FLT_MAX
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef DILATE
 | 
			
		||||
#define VAL -FLT_MAX
 | 
			
		||||
#endif
 | 
			
		||||
#define MIN_VAL (-FLT_MAX)
 | 
			
		||||
#define MAX_VAL FLT_MAX
 | 
			
		||||
#elif defined DEPTH_6
 | 
			
		||||
#ifdef ERODE
 | 
			
		||||
#define VAL DBL_MAX
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef DILATE
 | 
			
		||||
#define VAL -DBL_MAX
 | 
			
		||||
#endif
 | 
			
		||||
#define MIN_VAL (-DBL_MAX)
 | 
			
		||||
#define MAX_VAL DBL_MAX
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef ERODE
 | 
			
		||||
#if defined(INTEL_DEVICE) && (DEPTH_0)
 | 
			
		||||
#ifdef OP_ERODE
 | 
			
		||||
#define VAL MAX_VAL
 | 
			
		||||
#elif defined OP_DILATE
 | 
			
		||||
#define VAL MIN_VAL
 | 
			
		||||
#else
 | 
			
		||||
#error "Unknown operation"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef OP_ERODE
 | 
			
		||||
#if defined INTEL_DEVICE && defined DEPTH_0
 | 
			
		||||
// workaround for bug in Intel HD graphics drivers (10.18.10.3496 or older)
 | 
			
		||||
#define __CAT(x, y) x##y
 | 
			
		||||
#define CAT(x, y) __CAT(x, y)
 | 
			
		||||
#define WA_CONVERT_1 CAT(convert_uint, cn)
 | 
			
		||||
#define WA_CONVERT_2 CAT(convert_, T)
 | 
			
		||||
#define convert_uint1 convert_uint
 | 
			
		||||
#define MORPH_OP(A,B) WA_CONVERT_2(min(WA_CONVERT_1(A),WA_CONVERT_1(B)))
 | 
			
		||||
#define MORPH_OP(A, B) WA_CONVERT_2(min(WA_CONVERT_1(A), WA_CONVERT_1(B)))
 | 
			
		||||
#else
 | 
			
		||||
#define MORPH_OP(A,B) min((A),(B))
 | 
			
		||||
#define MORPH_OP(A, B) min((A), (B))
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
#ifdef DILATE
 | 
			
		||||
#define MORPH_OP(A,B) max((A),(B))
 | 
			
		||||
#ifdef OP_DILATE
 | 
			
		||||
#define MORPH_OP(A, B) max((A), (B))
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define PROCESS(y, x) \
 | 
			
		||||
    res = MORPH_OP(res, LDS_DAT[mad24(l_y + y, width, l_x + x)]);
 | 
			
		||||
 | 
			
		||||
// BORDER_CONSTANT:      iiiiii|abcdefgh|iiiiiii
 | 
			
		||||
#define ELEM(i, l_edge, r_edge, elem1, elem2) (i) < (l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
__kernel void morph(__global const uchar * srcptr, int src_step, int src_offset,
 | 
			
		||||
                    __global uchar * dstptr, int dst_step, int dst_offset,
 | 
			
		||||
                    int src_offset_x, int src_offset_y, int cols, int rows,
 | 
			
		||||
                    __constant uchar * mat_kernel, int src_whole_cols, int src_whole_rows)
 | 
			
		||||
                    int src_whole_cols, int src_whole_rows EXTRA_PARAMS)
 | 
			
		||||
{
 | 
			
		||||
    int gidx = get_global_id(0), gidy = get_global_id(1);
 | 
			
		||||
    int l_x = get_local_id(0), l_y = get_local_id(1);
 | 
			
		||||
    int x = get_group_id(0) * LSIZE0, y = get_group_id(1) * LSIZE1;
 | 
			
		||||
    int start_x = x + src_offset_x - RADIUSX;
 | 
			
		||||
    int end_x = x + src_offset_x + LSIZE0 + RADIUSX;
 | 
			
		||||
    int width = end_x - (x + src_offset_x - RADIUSX) + 1;
 | 
			
		||||
    int width = mad24(RADIUSX, 2, LSIZE0 + 1);
 | 
			
		||||
    int start_y = y + src_offset_y - RADIUSY;
 | 
			
		||||
    int point1 = mad24(l_y, LSIZE0, l_x);
 | 
			
		||||
    int point2 = point1 + LSIZE0 * LSIZE1;
 | 
			
		||||
@@ -117,7 +128,7 @@ __kernel void morph(__global const uchar * srcptr, int src_step, int src_offset,
 | 
			
		||||
    int start_addr = mad24(cur_y, src_step, cur_x * TSIZE);
 | 
			
		||||
    int start_addr2 = mad24(cur_y2, src_step, cur_x2 * TSIZE);
 | 
			
		||||
 | 
			
		||||
    __local T LDS_DAT[2*LSIZE1*LSIZE0];
 | 
			
		||||
    __local T LDS_DAT[2 * LSIZE1 * LSIZE0];
 | 
			
		||||
 | 
			
		||||
    // read pixels from src
 | 
			
		||||
    int end_addr = mad24(src_whole_rows - 1, src_step, src_whole_cols * TSIZE);
 | 
			
		||||
@@ -128,8 +139,8 @@ __kernel void morph(__global const uchar * srcptr, int src_step, int src_offset,
 | 
			
		||||
    T temp1 = loadpix(srcptr + start_addr2);
 | 
			
		||||
 | 
			
		||||
    // judge if read out of boundary
 | 
			
		||||
    temp0 = ELEM(cur_x, 0, src_whole_cols, (T)(VAL),temp0);
 | 
			
		||||
    temp0 = ELEM(cur_y, 0, src_whole_rows, (T)(VAL),temp0);
 | 
			
		||||
    temp0 = ELEM(cur_x, 0, src_whole_cols, (T)(VAL), temp0);
 | 
			
		||||
    temp0 = ELEM(cur_y, 0, src_whole_rows, (T)(VAL), temp0);
 | 
			
		||||
 | 
			
		||||
    temp1 = ELEM(cur_x2, 0, src_whole_cols, (T)(VAL), temp1);
 | 
			
		||||
    temp1 = ELEM(cur_y2, 0, src_whole_rows, (T)(VAL), temp1);
 | 
			
		||||
@@ -138,23 +149,11 @@ __kernel void morph(__global const uchar * srcptr, int src_step, int src_offset,
 | 
			
		||||
    LDS_DAT[point2] = temp1;
 | 
			
		||||
    barrier(CLK_LOCAL_MEM_FENCE);
 | 
			
		||||
 | 
			
		||||
    T res = (T)(VAL);
 | 
			
		||||
    for (int i = 0, sizey = 2 * RADIUSY + 1; i < sizey; i++)
 | 
			
		||||
        for (int j = 0, sizex = 2 * RADIUSX + 1; j < sizex; j++)
 | 
			
		||||
        {
 | 
			
		||||
            res =
 | 
			
		||||
#ifndef RECTKERNEL
 | 
			
		||||
                mat_kernel[i*(2*RADIUSX+1)+j] ?
 | 
			
		||||
#endif
 | 
			
		||||
                MORPH_OP(res, LDS_DAT[mad24(l_y + i, width, l_x + j)])
 | 
			
		||||
#ifndef RECTKERNEL
 | 
			
		||||
                : res
 | 
			
		||||
#endif
 | 
			
		||||
                ;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    if (gidx < cols && gidy < rows)
 | 
			
		||||
    {
 | 
			
		||||
        T res = (T)(VAL);
 | 
			
		||||
        PROCESS_ELEMS;
 | 
			
		||||
 | 
			
		||||
        int dst_index = mad24(gidy, dst_step, mad24(gidx, TSIZE, dst_offset));
 | 
			
		||||
        storepix(res, dstptr + dst_index);
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user