From 0e1717c14c3f0928fa162c01b728cf6a830c7d11 Mon Sep 17 00:00:00 2001 From: Konstantin Matskevich Date: Fri, 15 Nov 2013 17:26:18 +0400 Subject: [PATCH] fixing bugs for Intel platform CPU device --- modules/ocl/src/arithm.cpp | 5 ++-- .../src/opencl/arithm_absdiff_nonsaturate.cl | 24 ++++++++++++------- modules/ocl/src/opencl/arithm_pow.cl | 13 ++++------ 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/modules/ocl/src/arithm.cpp b/modules/ocl/src/arithm.cpp index 997b2010f..68c52695c 100644 --- a/modules/ocl/src/arithm.cpp +++ b/modules/ocl/src/arithm.cpp @@ -1638,8 +1638,9 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string size_t localThreads[3] = { 64, 4, 1 }; size_t globalThreads[3] = { dst.cols, dst.rows, 1 }; + const char * const typeStr = depth == CV_32F ? "float" : "double"; const char * const channelMap[] = { "", "", "2", "4", "4" }; - std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[channels]); + std::string buildOptions = format("-D VT=%s%s -D T=%s", typeStr, channelMap[channels], typeStr); int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize(); int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize(); @@ -1655,7 +1656,7 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols )); float pf = static_cast(p); - if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE)) + if(src.depth() == CV_32F) args.push_back( make_pair( sizeof(cl_float), (void *)&pf )); else args.push_back( make_pair( sizeof(cl_double), (void *)&p )); diff --git a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl index c09560a5f..e03fa698a 100644 --- a/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl +++ b/modules/ocl/src/opencl/arithm_absdiff_nonsaturate.cl @@ -65,12 +65,16 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st int src1_index = mad24(y, src1_step, x + src1_offset); int src2_index = mad24(y, src2_step, x + src2_offset); int dst_index = mad24(y, dst_step, x + dst_offset); +#ifdef INTEL_DEVICE //workaround for intel compiler bug + if(src1_index >= 0 && src2_index >= 0) +#endif + { + dstT t0 = convertToDstT(src1[src1_index]); + dstT t1 = convertToDstT(src2[src2_index]); + dstT t2 = t0 - t1; - dstT t0 = convertToDstT(src1[src1_index]); - dstT t1 = convertToDstT(src2[src2_index]); - dstT t2 = t0 - t1; - - dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2; + dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2; + } } } @@ -85,9 +89,13 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int { int src1_index = mad24(y, src1_step, x + src1_offset); int dst_index = mad24(y, dst_step, x + dst_offset); +#ifdef INTEL_DEVICE //workaround for intel compiler bug + if(src1_index >= 0) +#endif + { + dstT t0 = convertToDstT(src1[src1_index]); - dstT t0 = convertToDstT(src1[src1_index]); - - dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0; + dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0; + } } } diff --git a/modules/ocl/src/opencl/arithm_pow.cl b/modules/ocl/src/opencl/arithm_pow.cl index bb0673d4a..385e4cc15 100644 --- a/modules/ocl/src/opencl/arithm_pow.cl +++ b/modules/ocl/src/opencl/arithm_pow.cl @@ -49,16 +49,13 @@ #elif defined (cl_khr_fp64) #pragma OPENCL EXTENSION cl_khr_fp64:enable #endif -#define F double -#else -#define F float #endif /************************************** pow **************************************/ -__kernel void arithm_pow(__global T * src, int src_step, int src_offset, - __global T * dst, int dst_step, int dst_offset, - int rows, int cols, F p) +__kernel void arithm_pow(__global VT * src, int src_step, int src_offset, + __global VT * dst, int dst_step, int dst_offset, + int rows, int cols, T p) { int x = get_global_id(0); int y = get_global_id(1); @@ -68,8 +65,8 @@ __kernel void arithm_pow(__global T * src, int src_step, int src_offset, int src_index = mad24(y, src_step, x + src_offset); int dst_index = mad24(y, dst_step, x + dst_offset); - T src_data = src[src_index]; - T tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data)))); + VT src_data = src[src_index]; + VT tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data)))); dst[dst_index] = tmp; }