Merge pull request #1807 from KonstantinMatskevich:intel_bug
This commit is contained in:
commit
4fd1960425
@ -1638,8 +1638,9 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string
|
||||
size_t localThreads[3] = { 64, 4, 1 };
|
||||
size_t globalThreads[3] = { dst.cols, dst.rows, 1 };
|
||||
|
||||
const char * const typeStr = depth == CV_32F ? "float" : "double";
|
||||
const char * const channelMap[] = { "", "", "2", "4", "4" };
|
||||
std::string buildOptions = format("-D T=%s%s", depth == CV_32F ? "float" : "double", channelMap[channels]);
|
||||
std::string buildOptions = format("-D VT=%s%s -D T=%s", typeStr, channelMap[channels], typeStr);
|
||||
|
||||
int src_step = src.step / src.elemSize(), src_offset = src.offset / src.elemSize();
|
||||
int dst_step = dst.step / dst.elemSize(), dst_offset = dst.offset / dst.elemSize();
|
||||
@ -1655,7 +1656,7 @@ static void arithmetic_pow_run(const oclMat &src, double p, oclMat &dst, string
|
||||
args.push_back( make_pair( sizeof(cl_int), (void *)&dst.cols ));
|
||||
|
||||
float pf = static_cast<float>(p);
|
||||
if (!src.clCxt->supportsFeature(FEATURE_CL_DOUBLE))
|
||||
if(src.depth() == CV_32F)
|
||||
args.push_back( make_pair( sizeof(cl_float), (void *)&pf ));
|
||||
else
|
||||
args.push_back( make_pair( sizeof(cl_double), (void *)&p ));
|
||||
|
@ -65,12 +65,16 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int src2_index = mad24(y, src2_step, x + src2_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
#ifdef INTEL_DEVICE //workaround for intel compiler bug
|
||||
if(src1_index >= 0 && src2_index >= 0)
|
||||
#endif
|
||||
{
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
dstT t1 = convertToDstT(src2[src2_index]);
|
||||
dstT t2 = t0 - t1;
|
||||
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
dstT t1 = convertToDstT(src2[src2_index]);
|
||||
dstT t2 = t0 - t1;
|
||||
|
||||
dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
|
||||
dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -85,9 +89,13 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, x + src1_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
#ifdef INTEL_DEVICE //workaround for intel compiler bug
|
||||
if(src1_index >= 0)
|
||||
#endif
|
||||
{
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
|
||||
dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
|
||||
dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -49,16 +49,13 @@
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#define F double
|
||||
#else
|
||||
#define F float
|
||||
#endif
|
||||
|
||||
/************************************** pow **************************************/
|
||||
|
||||
__kernel void arithm_pow(__global T * src, int src_step, int src_offset,
|
||||
__global T * dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, F p)
|
||||
__kernel void arithm_pow(__global VT * src, int src_step, int src_offset,
|
||||
__global VT * dst, int dst_step, int dst_offset,
|
||||
int rows, int cols, T p)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
@ -68,8 +65,8 @@ __kernel void arithm_pow(__global T * src, int src_step, int src_offset,
|
||||
int src_index = mad24(y, src_step, x + src_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
|
||||
T src_data = src[src_index];
|
||||
T tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
|
||||
VT src_data = src[src_index];
|
||||
VT tmp = src_data > 0 ? exp(p * log(src_data)) : (src_data == 0 ? 0 : exp(p * log(fabs(src_data))));
|
||||
|
||||
dst[dst_index] = tmp;
|
||||
}
|
||||
|
@ -250,7 +250,8 @@ namespace cv
|
||||
&& devInfo.deviceType == CVCL_DEVICE_TYPE_CPU
|
||||
&& devInfo.platform->platformVendor.find("Intel") != std::string::npos
|
||||
&& (devInfo.deviceVersion.find("Build 56860") != std::string::npos
|
||||
|| devInfo.deviceVersion.find("Build 76921") != std::string::npos))
|
||||
|| devInfo.deviceVersion.find("Build 76921") != std::string::npos
|
||||
|| devInfo.deviceVersion.find("Build 78712") != std::string::npos))
|
||||
build_options += " -D BYPASS_VSTORE=true";
|
||||
|
||||
size_t globalThreads[3] = { divUp(src.cols, VEC_SIZE), src.rows, 1 };
|
||||
|
Loading…
x
Reference in New Issue
Block a user