format files to ANSI C style with coolformat

change the download channels to oclchannles()
fix bugs of arithm functions
perf fix of bilateral
bug fix of split test case
add build_warps functions
This commit is contained in:
niko
2012-10-11 16:22:47 +08:00
parent 69fbc6102c
commit 97156897b2
78 changed files with 15433 additions and 12118 deletions

View File

@@ -48,66 +48,66 @@ using namespace std;
#ifdef HAVE_CLAMDBLAS
////////////////////////////////////////////////////////////////////////////
// GEMM
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
PARAM_TEST_CASE(Gemm, int, cv::Size, int)
{
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
int type;
cv::Size mat_size;
int flags;
vector<cv::ocl::Info> info;
virtual void SetUp()
{
type = GET_PARAM(0);
mat_size = GET_PARAM(1);
flags = GET_PARAM(2);
cv::ocl::getDevice(info);
}
cv::ocl::getDevice(info);
}
};
TEST_P(Gemm, Performance)
{
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst;
cv::Mat a = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat b = randomMat(mat_size, type, 0.0, 10.0);
cv::Mat c = randomMat(mat_size, type, 0.0, 10.0);
cv::ocl::oclMat ocl_dst;
double totalgputick=0;
double totalgputick_kernel=0;
double t1=0;
double t2=0;
double totalgputick = 0;
double totalgputick_kernel = 0;
double t1 = 0;
double t2 = 0;
for(int j = 0; j < LOOP_TIMES+1; j ++)
{
for(int j = 0; j < LOOP_TIMES + 1; j ++)
{
t1 = (double)cvGetTickCount();//gpu start1
t1 = (double)cvGetTickCount();//gpu start1
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload
cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload
cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload
t2=(double)cvGetTickCount();//kernel
cv::ocl::gemm(ga, gb, 1.0,gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel
t2 = (double)cvGetTickCount(); //kernel
cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags);
t2 = (double)cvGetTickCount() - t2;//kernel
cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download
cv::Mat cpu_dst;
ocl_dst.download (cpu_dst);//download
t1 = (double)cvGetTickCount() - t1;//gpu end
t1 = (double)cvGetTickCount() - t1;//gpu end
if(j == 0)
continue;
if(j == 0)
continue;
totalgputick=t1+totalgputick;
totalgputick_kernel=t2+totalgputick_kernel;
totalgputick = t1 + totalgputick;
totalgputick_kernel = t2 + totalgputick_kernel;
}
cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl;
}
cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl;
}
INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/),
testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)),
testing::Values(0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_1_T + cv::GEMM_2_T)));
#endif