Merge pull request #1599 from ilya-lavrenov:ocl_mac_kernel_warnings

This commit is contained in:
Andrey Pavlenko 2013-10-11 12:40:12 +04:00 committed by OpenCV Buildbot
commit 171e0e62ec
15 changed files with 53 additions and 39 deletions

View File

@ -266,7 +266,8 @@ enum {
CV_OpenGlNotSupported= -218, CV_OpenGlNotSupported= -218,
CV_OpenGlApiCallError= -219, CV_OpenGlApiCallError= -219,
CV_OpenCLDoubleNotSupported= -220, CV_OpenCLDoubleNotSupported= -220,
CV_OpenCLInitError= -221 CV_OpenCLInitError= -221,
CV_OpenCLNoAMDBlasFft= -222
}; };
/****************************************************************************************\ /****************************************************************************************\

View File

@ -212,13 +212,35 @@ void openCLVerifyKernel(const Context *ctx, cl_kernel kernel, size_t *localThrea
static double total_execute_time = 0; static double total_execute_time = 0;
static double total_kernel_time = 0; static double total_kernel_time = 0;
#endif #endif
static std::string removeDuplicatedWhiteSpaces(const char * buildOptions)
{
if (buildOptions == NULL)
return "";
size_t length = strlen(buildOptions), didx = 0, sidx = 0;
while (sidx < length && buildOptions[sidx] == 0)
++sidx;
std::string opt;
opt.resize(length);
for ( ; sidx < length; ++sidx)
if (buildOptions[sidx] != ' ')
opt[didx++] = buildOptions[sidx];
else if ( !(didx > 0 && opt[didx - 1] == ' ') )
opt[didx++] = buildOptions[sidx];
return opt;
}
void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3], void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, string kernelName, size_t globalThreads[3],
size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels, size_t localThreads[3], vector< pair<size_t, const void *> > &args, int channels,
int depth, const char *build_options) int depth, const char *build_options)
{ {
//construct kernel name //construct kernel name
//The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
//for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char) //for example split_C2_D3, represent the split kernel with channels = 2 and dataType Depth = 3(Data type is short)
stringstream idxStr; stringstream idxStr;
if(channels != -1) if(channels != -1)
idxStr << "_C" << channels; idxStr << "_C" << channels;
@ -227,7 +249,8 @@ void openCLExecuteKernel_(Context *ctx, const cv::ocl::ProgramEntry* source, str
kernelName += idxStr.str(); kernelName += idxStr.str();
cl_kernel kernel; cl_kernel kernel;
kernel = openCLGetKernelFromSource(ctx, source, kernelName, build_options); std::string fixedOptions = removeDuplicatedWhiteSpaces(build_options);
kernel = openCLGetKernelFromSource(ctx, source, kernelName, fixedOptions.c_str());
if ( localThreads != NULL) if ( localThreads != NULL)
{ {

View File

@ -50,7 +50,7 @@ using namespace cv::ocl;
#if !defined HAVE_CLAMDFFT #if !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat&, oclMat&, Size, int) void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
{ {
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented"); CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
} }
namespace cv { namespace ocl { namespace cv { namespace ocl {
void fft_teardown(); void fft_teardown();

View File

@ -58,12 +58,12 @@ void clBlasTeardown();
void cv::ocl::gemm(const oclMat&, const oclMat&, double, void cv::ocl::gemm(const oclMat&, const oclMat&, double,
const oclMat&, double, oclMat&, int) const oclMat&, double, oclMat&, int)
{ {
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
} }
void cv::ocl::clBlasSetup() void cv::ocl::clBlasSetup()
{ {
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented"); CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
} }
void cv::ocl::clBlasTeardown() void cv::ocl::clBlasTeardown()

View File

@ -1497,7 +1497,7 @@ namespace cv
openCLSafeCall(clReleaseKernel(kernel)); openCLSafeCall(clReleaseKernel(kernel));
static char opt[20] = {0}; static char opt[20] = {0};
sprintf(opt, " -D WAVE_SIZE=%d", (int)wave_size); sprintf(opt, "-D WAVE_SIZE=%d", (int)wave_size);
openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt); openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt);
} }
} }

View File

@ -70,7 +70,7 @@ __kernel void arithm_absdiff_nonsaturate_binary(__global srcT *src1, int src1_st
dstT t1 = convertToDstT(src2[src2_index]); dstT t1 = convertToDstT(src2[src2_index]);
dstT t2 = t0 - t1; dstT t2 = t0 - t1;
dst[dst_index] = t2 >= 0 ? t2 : -t2; dst[dst_index] = t2 >= (dstT)(0) ? t2 : -t2;
} }
} }
@ -88,6 +88,6 @@ __kernel void arithm_absdiff_nonsaturate(__global srcT *src1, int src1_step, int
dstT t0 = convertToDstT(src1[src1_index]); dstT t0 = convertToDstT(src1[src1_index]);
dst[dst_index] = t0 >= 0 ? t0 : -t0; dst[dst_index] = t0 >= (dstT)(0) ? t0 : -t0;
} }
} }

View File

@ -51,14 +51,14 @@
#endif #endif
#endif #endif
#if defined (FUNC_SUM) #if FUNC_SUM
#define FUNC(a, b) b += a; #define FUNC(a, b) b += a;
#endif #elif FUNC_ABS_SUM
#if defined (FUNC_ABS_SUM) #define FUNC(a, b) b += a >= (dstT)(0) ? a : -a;
#define FUNC(a, b) b += a >= 0 ? a : -a; #elif FUNC_SQR_SUM
#endif
#if defined (FUNC_SQR_SUM)
#define FUNC(a, b) b += a * a; #define FUNC(a, b) b += a * a;
#else
#error No sum function
#endif #endif
/**************************************Array buffer SUM**************************************/ /**************************************Array buffer SUM**************************************/

View File

@ -211,7 +211,7 @@ __kernel void filter2D(
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(globalRow < rows && globalCol < cols) if(globalRow < rows && globalCol < cols)
{ {
T_SUM sum = (T_SUM)SUM_ZERO; T_SUM sum = (T_SUM)(SUM_ZERO);
int filterIdx = 0; int filterIdx = 0;
for(int i = 0; i < FILTER_SIZE; i++) for(int i = 0; i < FILTER_SIZE; i++)
{ {
@ -291,7 +291,7 @@ __kernel void filter2D_3x3(
T_IMG data = src[mad24(selected_row, src_step, selected_cols)]; T_IMG data = src[mad24(selected_row, src_step, selected_cols)];
int con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols; int con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
data = con ? data : 0; data = con ? data : (T_IMG)(0);
local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data; local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data;
if(lX < (ANX << 1)) if(lX < (ANX << 1))
@ -300,7 +300,7 @@ __kernel void filter2D_3x3(
data = src[mad24(selected_row, src_step, selected_cols)]; data = src[mad24(selected_row, src_step, selected_cols)];
con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols; con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
data = con ? data : 0; data = con ? data : (T_IMG)(0);
local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data; local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data;
} }
#else #else

View File

@ -290,7 +290,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
minimum += cmax_disc_term; minimum += cmax_disc_term;
float4 sum = 0; float4 sum = (float4)(0);
prev = convert_float4(t_dst[CNDISP - 1]); prev = convert_float4(t_dst[CNDISP - 1]);
for (int disp = CNDISP - 2; disp >= 0; disp--) for (int disp = CNDISP - 2; disp >= 0; disp--)
{ {
@ -308,7 +308,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
t_dst[CNDISP - 1] = saturate_cast4(dst_reg); t_dst[CNDISP - 1] = saturate_cast4(dst_reg);
sum += dst_reg; sum += dst_reg;
sum /= CNDISP; sum /= (float4)(CNDISP);
#pragma unroll #pragma unroll
for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step) for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step)
{ {

View File

@ -134,7 +134,7 @@ static void lkSparse_run(oclMat &I, oclMat &J,
openCLSafeCall(clReleaseKernel(kernel)); openCLSafeCall(clReleaseKernel(kernel));
static char opt[32] = {0}; static char opt[32] = {0};
sprintf(opt, " -D WAVE_SIZE=%d", wave_size); sprintf(opt, "-D WAVE_SIZE=%d", wave_size);
openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads, openCLExecuteKernel(clCxt, &pyrlk, kernelName, globalThreads, localThreads,
args, I.oclchannels(), I.depth(), opt); args, I.oclchannels(), I.depth(), opt);

View File

@ -47,8 +47,6 @@
using namespace std; using namespace std;
#ifdef HAVE_CLAMDFFT
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// Dft // Dft
@ -102,9 +100,6 @@ OCL_TEST_P(Dft, R2CthenC2R)
EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4); EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4);
} }
INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine( INSTANTIATE_TEST_CASE_P(OCL_ImgProc, Dft, testing::Combine(
testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)), testing::Values(cv::Size(2, 3), cv::Size(5, 4), cv::Size(25, 20), cv::Size(512, 1), cv::Size(1024, 768)),
testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) )); testing::Values(0, (int)cv::DFT_ROWS, (int)cv::DFT_SCALE) ));
#endif // HAVE_CLAMDFFT

View File

@ -42,12 +42,13 @@
// //
//M*/ //M*/
#include "test_precomp.hpp" #include "test_precomp.hpp"
using namespace std; using namespace std;
#ifdef HAVE_CLAMDBLAS
//////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////
// GEMM // GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int) PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{ {
int type; int type;
@ -81,4 +82,3 @@ INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/), testing::Values(CV_32FC1, CV_32FC2/*, CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(20, 20), cv::Size(300, 300)), testing::Values(cv::Size(20, 20), cv::Size(300, 300)),
testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T)))); testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T))));
#endif

View File

@ -46,8 +46,6 @@
#ifdef HAVE_OPENCL #ifdef HAVE_OPENCL
#ifdef HAVE_CLAMDBLAS
using namespace cv; using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
using namespace cvtest; using namespace cvtest;
@ -147,6 +145,4 @@ OCL_TEST_P(Kalman, Accuracy)
INSTANTIATE_TEST_CASE_P(OCL_Video, Kalman, Combine(Values(3, 7), Values(30))); INSTANTIATE_TEST_CASE_P(OCL_Video, Kalman, Combine(Values(3, 7), Values(30)));
#endif // HAVE_CLAMDBLAS
#endif // HAVE_OPENCL #endif // HAVE_OPENCL

View File

@ -128,8 +128,6 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, KNN, Combine(Values(6, 5), Values(Size(200, 400)
////////////////////////////////SVM///////////////////////////////////////////////// ////////////////////////////////SVM/////////////////////////////////////////////////
#ifdef HAVE_CLAMDBLAS
PARAM_TEST_CASE(SVM_OCL, int, int, int) PARAM_TEST_CASE(SVM_OCL, int, int, int)
{ {
cv::Size size; cv::Size size;
@ -307,6 +305,5 @@ INSTANTIATE_TEST_CASE_P(OCL_ML, SVM_OCL, testing::Combine(
Values((int)CvSVM::C_SVC, (int)CvSVM::NU_SVC, (int)CvSVM::ONE_CLASS, (int)CvSVM::NU_SVR), Values((int)CvSVM::C_SVC, (int)CvSVM::NU_SVC, (int)CvSVM::ONE_CLASS, (int)CvSVM::NU_SVR),
Values(2, 3, 4) Values(2, 3, 4)
)); ));
#endif // HAVE_CLAMDBLAS
#endif // HAVE_OPENCL #endif // HAVE_OPENCL

View File

@ -291,10 +291,12 @@ CV_FLAGS(DftFlags, DFT_INVERSE, DFT_SCALE, DFT_ROWS, DFT_COMPLEX_OUTPUT, DFT_REA
} \ } \
catch (const cv::Exception & ex) \ catch (const cv::Exception & ex) \
{ \ { \
if (ex.code != CV_OpenCLDoubleNotSupported) \ if (ex.code == CV_OpenCLDoubleNotSupported)\
throw; \
else \
std::cout << "Test skipped (selected device does not support double)" << std::endl; \ std::cout << "Test skipped (selected device does not support double)" << std::endl; \
else if (ex.code == CV_OpenCLNoAMDBlasFft) \
std::cout << "Test skipped (AMD Blas / Fft libraries are not available)" << std::endl; \
else \
throw; \
} \ } \
} \ } \
\ \