Merge pull request #2751 from akarsakov:ocl_thresh_opt
This commit is contained in:
		@@ -53,29 +53,40 @@
 | 
			
		||||
 | 
			
		||||
__kernel void threshold(__global const uchar * srcptr, int src_step, int src_offset,
 | 
			
		||||
                        __global uchar * dstptr, int dst_step, int dst_offset, int rows, int cols,
 | 
			
		||||
                        T1 thresh, T1 max_val)
 | 
			
		||||
                        T1 thresh, T1 max_val, T1 min_val)
 | 
			
		||||
{
 | 
			
		||||
    int gx = get_global_id(0);
 | 
			
		||||
    int gy = get_global_id(1);
 | 
			
		||||
    int gy = get_global_id(1) * STRIDE_SIZE;
 | 
			
		||||
 | 
			
		||||
    if (gx < cols && gy < rows)
 | 
			
		||||
    if (gx < cols)
 | 
			
		||||
    {
 | 
			
		||||
        int src_index = mad24(gy, src_step, mad24(gx, (int)sizeof(T), src_offset));
 | 
			
		||||
        int dst_index = mad24(gy, dst_step, mad24(gx, (int)sizeof(T), dst_offset));
 | 
			
		||||
 | 
			
		||||
        T sdata = *(__global const T *)(srcptr + src_index);
 | 
			
		||||
        __global T * dst = (__global T *)(dstptr + dst_index);
 | 
			
		||||
        #pragma unroll
 | 
			
		||||
        for (int i = 0; i < STRIDE_SIZE; i++)
 | 
			
		||||
        {
 | 
			
		||||
            if (gy < rows)
 | 
			
		||||
            {
 | 
			
		||||
                T sdata = *(__global const T *)(srcptr + src_index);
 | 
			
		||||
                __global T * dst = (__global T *)(dstptr + dst_index);
 | 
			
		||||
 | 
			
		||||
#ifdef THRESH_BINARY
 | 
			
		||||
        dst[0] = sdata > (T)(thresh) ? (T)(max_val) : (T)(0);
 | 
			
		||||
#elif defined THRESH_BINARY_INV
 | 
			
		||||
        dst[0] = sdata > (T)(thresh) ? (T)(0) : (T)(max_val);
 | 
			
		||||
#elif defined THRESH_TRUNC
 | 
			
		||||
        dst[0] = sdata > (T)(thresh) ? (T)(thresh) : sdata;
 | 
			
		||||
#elif defined THRESH_TOZERO
 | 
			
		||||
        dst[0] = sdata > (T)(thresh) ? sdata : (T)(0);
 | 
			
		||||
#elif defined THRESH_TOZERO_INV
 | 
			
		||||
        dst[0] = sdata > (T)(thresh) ? (T)(0) : sdata;
 | 
			
		||||
#endif
 | 
			
		||||
                #ifdef THRESH_BINARY
 | 
			
		||||
                        dst[0] = sdata > (thresh) ? (T)(max_val) : (T)(0);
 | 
			
		||||
                #elif defined THRESH_BINARY_INV
 | 
			
		||||
                        dst[0] = sdata > (thresh) ? (T)(0) : (T)(max_val);
 | 
			
		||||
                #elif defined THRESH_TRUNC
 | 
			
		||||
                        dst[0] = clamp(sdata, (T)min_val, (T)(thresh));
 | 
			
		||||
                #elif defined THRESH_TOZERO
 | 
			
		||||
                        dst[0] = sdata > (thresh) ? sdata : (T)(0);
 | 
			
		||||
                #elif defined THRESH_TOZERO_INV
 | 
			
		||||
                        dst[0] = sdata > (thresh) ? (T)(0) : sdata;
 | 
			
		||||
                #endif
 | 
			
		||||
 | 
			
		||||
                gy++;
 | 
			
		||||
                src_index += src_step;
 | 
			
		||||
                dst_index += dst_step;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -833,9 +833,12 @@ static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, d
 | 
			
		||||
 | 
			
		||||
    const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
 | 
			
		||||
                                          "THRESH_TOZERO", "THRESH_TOZERO_INV" };
 | 
			
		||||
    ocl::Device dev = ocl::Device::getDefault();
 | 
			
		||||
    int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
 | 
			
		||||
 | 
			
		||||
    ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
 | 
			
		||||
                  format("-D %s -D T=%s -D T1=%s%s", thresholdMap[thresh_type],
 | 
			
		||||
                         ocl::typeToStr(ktype), ocl::typeToStr(depth),
 | 
			
		||||
                  format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type],
 | 
			
		||||
                         ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size,
 | 
			
		||||
                         doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
 | 
			
		||||
    if (k.empty())
 | 
			
		||||
        return false;
 | 
			
		||||
@@ -847,11 +850,16 @@ static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, d
 | 
			
		||||
    if (depth <= CV_32S)
 | 
			
		||||
        thresh = cvFloor(thresh);
 | 
			
		||||
 | 
			
		||||
    const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 };
 | 
			
		||||
    double min_val = min_vals[depth];
 | 
			
		||||
 | 
			
		||||
    k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
 | 
			
		||||
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
 | 
			
		||||
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))));
 | 
			
		||||
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))),
 | 
			
		||||
           ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val))));
 | 
			
		||||
 | 
			
		||||
    size_t globalsize[2] = { dst.cols * cn / kercn, dst.rows };
 | 
			
		||||
    globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size;
 | 
			
		||||
    return k.run(2, globalsize, NULL, false);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user