enabled accuracy tests for the functions that use inside AMD Blas/Fft

This commit is contained in:
Ilya Lavrenov
2013-10-11 00:00:01 +04:00
parent 1f51e6c0de
commit 1be77dd2f3
11 changed files with 19 additions and 28 deletions

View File

@@ -50,7 +50,7 @@ using namespace cv::ocl;
#if !defined HAVE_CLAMDFFT
void cv::ocl::dft(const oclMat&, oclMat&, Size, int)
{
CV_Error(CV_StsNotImplemented, "OpenCL DFT is not implemented");
CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL DFT is not implemented");
}
namespace cv { namespace ocl {
void fft_teardown();

View File

@@ -58,12 +58,12 @@ void clBlasTeardown();
void cv::ocl::gemm(const oclMat&, const oclMat&, double,
const oclMat&, double, oclMat&, int)
{
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
}
void cv::ocl::clBlasSetup()
{
CV_Error(CV_StsNotImplemented, "OpenCL BLAS is not implemented");
CV_Error(CV_OpenCLNoAMDBlasFft, "OpenCL BLAS is not implemented");
}
void cv::ocl::clBlasTeardown()

View File

@@ -211,7 +211,7 @@ __kernel void filter2D(
barrier(CLK_LOCAL_MEM_FENCE);
if(globalRow < rows && globalCol < cols)
{
T_SUM sum = (T_SUM)SUM_ZERO;
T_SUM sum = (T_SUM)(SUM_ZERO);
int filterIdx = 0;
for(int i = 0; i < FILTER_SIZE; i++)
{
@@ -291,7 +291,7 @@ __kernel void filter2D_3x3(
T_IMG data = src[mad24(selected_row, src_step, selected_cols)];
int con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
data = con ? data : 0;
data = con ? data : (T_IMG)(0);
local_data[mad24(i, LOCAL_MEM_STEP, lX)] = data;
if(lX < (ANX << 1))
@@ -300,7 +300,7 @@ __kernel void filter2D_3x3(
data = src[mad24(selected_row, src_step, selected_cols)];
con = selected_row >= 0 && selected_row < wholerows && selected_cols >= 0 && selected_cols < wholecols;
data = con ? data : 0;
data = con ? data : (T_IMG)(0);
local_data[mad24(i, LOCAL_MEM_STEP, lX) + groupX_size] = data;
}
#else

View File

@@ -290,7 +290,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
minimum += cmax_disc_term;
float4 sum = 0;
float4 sum = (float4)(0);
prev = convert_float4(t_dst[CNDISP - 1]);
for (int disp = CNDISP - 2; disp >= 0; disp--)
{
@@ -308,7 +308,7 @@ void message(__global T *us_, __global T *ds_, __global T *ls_, __global T *rs_,
t_dst[CNDISP - 1] = saturate_cast4(dst_reg);
sum += dst_reg;
sum /= CNDISP;
sum /= (float4)(CNDISP);
#pragma unroll
for(int i = 0, idx = 0; i < CNDISP; ++i, idx+=msg_disp_step)
{