Merge pull request #1599 from ilya-lavrenov:ocl_mac_kernel_warnings
This commit is contained in:
commit
171e0e62ec
@ -266,7 +266,8 @@ enum {
|
||||
CV_OpenGlNotSupported= -218,
|
||||
CV_OpenGlApiCallError= -219,
|
||||
CV_OpenCLDoubleNotSupported= -220,
|
||||
CV_OpenCLInitError= -221
|
||||
CV_OpenCLInitError= -221,
|
||||
CV_OpenCLNoAMDBlasFft= -222
|
||||
};
|
||||
|
||||
/****************************************************************************************\
|
||||
|
@ -212,13 +212,35 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea
|
||||
static double total_execute_time = 0;
|
||||
static double total_kernel_time = 0;
|
||||
#endif
|
||||
|
||||
static std::string removeDuplicatedWhiteSpaces(const char * buildOptions)
|
||||
{
|
||||
if (buildOptions == NULL)
|
||||
return "";
|
||||
|
||||
size_t length = strlen(buildOptions), didx = 0, sidx = 0;
|
||||
while (sidx < length && buildOptions[sidx] == 0)
|
||||
++sidx;
|
||||
|
||||
std::string opt;
|
||||
opt.resize(length);
|
||||
|
||||
for ( ; sidx < length; ++sidx)
|
||||
if (buildOptions[sidx] != ' ')
|
||||
opt[didx++] = buildOptions[sidx];
|
||||
else if ( !(didx > 0 && opt[didx - 1] == ' ') )
|
||||
opt[didx++] = buildOptions[sidx];
|
||||
|
||||
return opt;
|
||||
}
|
||||
|
||||
void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
|
||||
int depth, const char *build_options)
|
||||
{
|
||||
//construct kernel name
|
||||
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
|
||||
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
|
||||
//for example split_C2_D3, represent the split kernel with channels = 2 and dataType Depth = 3(Data type is short)
|
||||
stringstream idxStr;
|
||||
if(channels != -1)
|
||||
idxStr << "_C" << channels;
|
||||
@ -227,7 +249,8 @@ void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, str
|
||||
kernelName += idxStr.str();
|
||||
|
||||
cl_kernel kernel;
|
||||
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options);
|
||||
std::string fixedOptions = removeDuplicatedWhiteSpaces(build_options);
|
||||
kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str());
|
||||
|
||||
if ( localThreads != NULL)
|
||||
{
|
||||
|
@ -50,7 +50,7 @@ using namespace cv::ocl;
|
||||
#if !defined HAVE_CLAMDFFT
|
||||
void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
|
||||
{
|
||||
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
|
||||
CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
|
||||
}
|
||||
namespace cv { namespace ocl {
|
||||
void fft_teardown();
|
||||
|
@ -58,12 +58,12 @@ void clBlasTeardown();
|
||||
void cv::ocl::gemm(const oclMat&, const oclMat&, double,
|
||||
const oclMat&, double, oclMat&, int)
|
||||
{
|
||||
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
|
||||
CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
|
||||
}
|
||||
|
||||
void cv::ocl::clBlasSetup()
|
||||
{
|
||||
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
|
||||
CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
|
||||
}
|
||||
|
||||
void cv::ocl::clBlasTeardown()
|
||||
|
@ -1497,7 +1497,7 @@ namespace cv
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
|
||||
static char opt[20] = {0};
|
||||
sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size);
|
||||
sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
|
||||
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
|
||||
}
|
||||
}
|
||||
|
@ -70,7 +70,7 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
|
||||
dstT t1 = convertToDstT(src2[src2_index]);
|
||||
dstT t2 = t0 - t1;
|
||||
|
||||
dst[dst_index] = t2 >= 0 ? t2 : -t2;
|
||||
dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,6 +88,6 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
|
||||
|
||||
dstT t0 = convertToDstT(src1[src1_index]);
|
||||
|
||||
dst[dst_index] = t0 >= 0 ? t0 : -t0;
|
||||
dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
|
||||
}
|
||||
}
|
||||
|
@ -51,14 +51,14 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined (FUNC_SUM)
|
||||
#if FUNC_SUM
|
||||
#define FUNC(a, b) b += a;
|
||||
#endif
|
||||
#if defined (FUNC_ABS_SUM)
|
||||
#define FUNC(a, b) b += a >= 0 ? a : -a;
|
||||
#endif
|
||||
#if defined (FUNC_SQR_SUM)
|
||||
#elif FUNC_ABS_SUM
|
||||
#define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
|
||||
#elif FUNC_SQR_SUM
|
||||
#define FUNC(a, b) b += a * a;
|
||||
#else
|
||||
#error No sum function
|
||||
#endif
|
||||
|
||||
/**************************************Array buffer SUM**************************************/
|
||||
|
@ -211,7 +211,7 @@ __kernel void filter2D(
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
if(globalRow < rows && globalCol < cols)
|
||||
{
|
||||
T_SUM sum = (T_SUM)SUM_ZERO;
|
||||
T_SUM sum = (T_SUM)(SUM_ZERO);
|
||||
int filterIdx = 0;
|
||||
for(int i = 0; i < FILTER_SIZE; i++)
|
||||
{
|
||||
@ -291,7 +291,7 @@ __kernel void filter2D_3x3(
|
||||
|
||||
T_IMG data = src[mad24(selected_row, src_step, selected_cols)];
|
||||
int con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
|
||||
data = con ? data : 0;
|
||||
data = con ? data : (T_IMG)(0);
|
||||
local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data;
|
||||
|
||||
if(lX < (ANX << 1))
|
||||
@ -300,7 +300,7 @@ __kernel void filter2D_3x3(
|
||||
|
||||
data = src[mad24(selected_row, src_step, selected_cols)];
|
||||
con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
|
||||
data = con ? data : 0;
|
||||
data = con ? data : (T_IMG)(0);
|
||||
local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data;
|
||||
}
|
||||
#else
|
||||
|
@ -290,7 +290,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
|
||||
|
||||
minimum += cmax_disc_term;
|
||||
|
||||
float4 sum = 0;
|
||||
float4 sum = (float4)(0);
|
||||
prev = convert_float4(t_dst[CNDISP - 1]);
|
||||
for (int disp = CNDISP - 2; disp >= 0; disp--)
|
||||
{
|
||||
@ -308,7 +308,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
|
||||
t_dst[CNDISP - 1] = saturate_cast4(dst_reg);
|
||||
sum += dst_reg;
|
||||
|
||||
sum /= CNDISP;
|
||||
sum /= (float4)(CNDISP);
|
||||
#pragma unroll
|
||||
for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step)
|
||||
{
|
||||
|
@ -134,7 +134,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
|
||||
openCLSafeCall(clReleaseKernel(kernel));
|
||||
|
||||
static char opt[32] = {0};
|
||||
sprintf(opt, " -D WAVE_SIZE=%d", wave_size);
|
||||
sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
|
||||
|
||||
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
|
||||
args, I.oclchannels(), I.depth(), opt);
|
||||
|
@ -47,8 +47,6 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// Dft
|
||||
|
||||
@ -102,9 +100,6 @@ OCL_TEST_P(Dft, R2CthenC2R)
|
||||
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4);
|
||||
}
|
||||
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
|
||||
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
|
||||
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
|
||||
|
||||
#endif // HAVE_CLAMDFFT
|
||||
|
@ -42,12 +42,13 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
|
||||
using namespace std;
|
||||
#ifdef HAVE_CLAMDBLAS
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
// GEMM
|
||||
|
||||
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
|
||||
{
|
||||
int type;
|
||||
@ -81,4 +82,3 @@ INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
|
||||
testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
|
||||
testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
|
||||
testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
|
||||
#endif
|
||||
|
@ -46,8 +46,6 @@
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
#ifdef HAVE_CLAMDBLAS
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
using namespace cvtest;
|
||||
@ -147,6 +145,4 @@ OCL_TEST_P(Kalman, Accuracy)
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(OCL_Video, Kalman, Combine(Values(3, 7), Values(30)));
|
||||
|
||||
#endif // HAVE_CLAMDBLAS
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -128,8 +128,6 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, KNN, Combine(Values(6, 5), Values(Size(200, 400)
|
||||
|
||||
////////////////////////////////SVM/////////////////////////////////////////////////
|
||||
|
||||
#ifdef HAVE_CLAMDBLAS
|
||||
|
||||
PARAM_TEST_CASE(SVM_OCL, int, int, int)
|
||||
{
|
||||
cv::Size size;
|
||||
@ -307,6 +305,5 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, SVM_OCL, testing::Combine(
|
||||
Values((int)CvSVM::C_SVC, (int)CvSVM::NU_SVC, (int)CvSVM::ONE_CLASS, (int)CvSVM::NU_SVR),
|
||||
Values(2, 3, 4)
|
||||
));
|
||||
#endif // HAVE_CLAMDBLAS
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
@ -291,10 +291,12 @@ CV_FLAGS(DftFlags, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REA
|
||||
} \
|
||||
catch (const cv::Exception & ex) \
|
||||
{ \
|
||||
if (ex.code != CV_OpenCLDoubleNotSupported) \
|
||||
throw; \
|
||||
else \
|
||||
if (ex.code == CV_OpenCLDoubleNotSupported)\
|
||||
std::cout << "Test skipped (selected device does not support double)" << std::endl; \
|
||||
else if (ex.code == CV_OpenCLNoAMDBlasFft) \
|
||||
std::cout << "Test skipped (AMD Blas / Fft libraries are not available)" << std::endl; \
|
||||
else \
|
||||
throw; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
Loading…
x
Reference in New Issue
Block a user