added more GPU perf. tests, refactored
This commit is contained in:
parent
11579324d8
commit
79ba160c1c
@ -9,11 +9,14 @@ void TestSystem::run()
|
||||
// Run initializers
|
||||
vector<Runnable*>::iterator it = inits_.begin();
|
||||
for (; it != inits_.end(); ++it)
|
||||
{
|
||||
(*it)->run();
|
||||
}
|
||||
|
||||
cout << setiosflags(ios_base::left);
|
||||
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
|
||||
<< setw(10) << "SPEEDUP" << "DESCRIPTION\n";
|
||||
<< setw(10) << "SPEEDUP"
|
||||
<< "DESCRIPTION\n";
|
||||
cout << resetiosflags(ios_base::left);
|
||||
|
||||
// Run tests
|
||||
@ -24,30 +27,23 @@ void TestSystem::run()
|
||||
try
|
||||
{
|
||||
(*it)->run();
|
||||
flush_subtest_data();
|
||||
flushSubtestData();
|
||||
}
|
||||
catch (const cv::Exception& e)
|
||||
catch (const cv::Exception&)
|
||||
{
|
||||
cout << TAB << "error";
|
||||
switch (e.code)
|
||||
{
|
||||
case CV_StsNoMem: cout << ": out of memory"; break;
|
||||
}
|
||||
if (!description_.str().empty())
|
||||
cout << " [" << description_.str() << "]";
|
||||
cout << endl;
|
||||
reset_subtest_data();
|
||||
resetSubtestData();
|
||||
}
|
||||
}
|
||||
|
||||
cout << setiosflags(ios_base::fixed | ios_base::left);
|
||||
cout << "\naverage GPU speedup: x" << setprecision(3)
|
||||
<< speedup_total_ / num_subtests_called_ << endl;
|
||||
cout << resetiosflags(ios_base::fixed | ios_base::left);
|
||||
cout << setiosflags(ios_base::fixed);
|
||||
cout << "\naverage GPU speedup: x"
|
||||
<< setprecision(3) << speedup_total_ / num_subtests_called_
|
||||
<< endl;
|
||||
cout << resetiosflags(ios_base::fixed);
|
||||
}
|
||||
|
||||
|
||||
void TestSystem::flush_subtest_data()
|
||||
void TestSystem::flushSubtestData()
|
||||
{
|
||||
if (!can_flush_)
|
||||
return;
|
||||
@ -58,9 +54,10 @@ void TestSystem::flush_subtest_data()
|
||||
double speedup = static_cast<double>(cpu_time) / std::max(1, gpu_time);
|
||||
speedup_total_ += speedup;
|
||||
|
||||
cout << TAB << setiosflags(ios_base::fixed | ios_base::left);
|
||||
cout << TAB << setiosflags(ios_base::left);
|
||||
|
||||
stringstream stream;
|
||||
|
||||
stream << cpu_time;
|
||||
cout << setw(10) << stream.str();
|
||||
|
||||
@ -73,11 +70,10 @@ void TestSystem::flush_subtest_data()
|
||||
cout << setw(10) << stream.str();
|
||||
|
||||
cout << description_.str();
|
||||
|
||||
cout << resetiosflags(ios_base::fixed | ios_base::left) << endl;
|
||||
cout << resetiosflags(ios_base::left) << endl;
|
||||
|
||||
num_subtests_called_++;
|
||||
reset_subtest_data();
|
||||
resetSubtestData();
|
||||
}
|
||||
|
||||
|
||||
|
@ -38,6 +38,13 @@ public:
|
||||
|
||||
void run();
|
||||
|
||||
// Ends current subtest and starts new one
|
||||
std::stringstream& subtest()
|
||||
{
|
||||
flushSubtestData();
|
||||
return description_;
|
||||
}
|
||||
|
||||
void cpuOn() { cpu_started_ = cv::getTickCount(); }
|
||||
|
||||
void cpuOff()
|
||||
@ -56,20 +63,13 @@ public:
|
||||
can_flush_ = true;
|
||||
}
|
||||
|
||||
// Ends current subtest and starts new one
|
||||
std::stringstream& subtest()
|
||||
{
|
||||
flush_subtest_data();
|
||||
return description_;
|
||||
}
|
||||
|
||||
private:
|
||||
TestSystem(): can_flush_(false), cpu_elapsed_(0), gpu_elapsed_(0),
|
||||
speedup_total_(0.0), num_subtests_called_(0) {};
|
||||
|
||||
void flush_subtest_data();
|
||||
void flushSubtestData();
|
||||
|
||||
void reset_subtest_data()
|
||||
void resetSubtestData()
|
||||
{
|
||||
cpu_elapsed_ = 0;
|
||||
gpu_elapsed_ = 0;
|
||||
@ -93,17 +93,6 @@ private:
|
||||
};
|
||||
|
||||
|
||||
#define TEST(name) \
|
||||
struct name##_test: Runnable \
|
||||
{ \
|
||||
name##_test(): Runnable(#name) { \
|
||||
TestSystem::instance()->addTest(this); \
|
||||
} \
|
||||
void run(); \
|
||||
} name##_test_instance; \
|
||||
void name##_test::run()
|
||||
|
||||
|
||||
#define INIT(name) \
|
||||
struct name##_init: Runnable \
|
||||
{ \
|
||||
@ -115,12 +104,22 @@ private:
|
||||
void name##_init::run()
|
||||
|
||||
|
||||
#define TEST(name) \
|
||||
struct name##_test: Runnable \
|
||||
{ \
|
||||
name##_test(): Runnable(#name) { \
|
||||
TestSystem::instance()->addTest(this); \
|
||||
} \
|
||||
void run(); \
|
||||
} name##_test_instance; \
|
||||
void name##_test::run()
|
||||
|
||||
#define SUBTEST TestSystem::instance()->subtest()
|
||||
#define DESCRIPTION TestSystem::instance()->subtest()
|
||||
#define CPU_ON TestSystem::instance()->cpuOn()
|
||||
#define GPU_ON TestSystem::instance()->gpuOn()
|
||||
#define CPU_OFF TestSystem::instance()->cpuOff()
|
||||
#define GPU_OFF TestSystem::instance()->gpuOff()
|
||||
#define SUBTEST TestSystem::instance()->subtest()
|
||||
#define DESCRIPTION TestSystem::instance()->subtest()
|
||||
|
||||
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
|
||||
cv::Scalar high);
|
||||
|
@ -6,42 +6,40 @@
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
|
||||
// This code calls CUFFT DFT and initializes that lib
|
||||
INIT(CUFFT_library)
|
||||
INIT(matchTemplate)
|
||||
{
|
||||
Mat src, templ;
|
||||
gen(src, 500, 500, CV_32F, 0, 1);
|
||||
gen(templ, 500, 500, CV_32F, 0, 1);
|
||||
Mat src; gen(src, 500, 500, CV_32F, 0, 1);
|
||||
Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_templ(templ);
|
||||
gpu::GpuMat d_result;
|
||||
gpu::GpuMat d_src(src), d_templ(templ), d_dst;
|
||||
|
||||
gpu::matchTemplate(d_src, d_templ, d_result, CV_TM_CCORR);
|
||||
gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
|
||||
}
|
||||
|
||||
|
||||
TEST(matchTemplate)
|
||||
{
|
||||
Mat src, templ, result;
|
||||
Mat src, templ, dst;
|
||||
gen(src, 3000, 3000, CV_32F, 0, 1);
|
||||
|
||||
gpu::GpuMat d_image(src), d_templ, d_result;
|
||||
gpu::GpuMat d_src(src), d_templ, d_dst;
|
||||
|
||||
for (int templ_size = 5; templ_size <= 1000; templ_size *= 2)
|
||||
for (int templ_size = 5; templ_size < 200; templ_size *= 5)
|
||||
{
|
||||
SUBTEST << "src " << src.rows << ", templ " << templ_size << ", 32F, CCORR";
|
||||
|
||||
gen(templ, templ_size, templ_size, CV_32F, 0, 1);
|
||||
dst.create(src.rows - templ.rows + 1, src.cols - templ.cols + 1, CV_32F);
|
||||
|
||||
CPU_ON;
|
||||
matchTemplate(src, templ, result, CV_TM_CCORR);
|
||||
matchTemplate(src, templ, dst, CV_TM_CCORR);
|
||||
CPU_OFF;
|
||||
|
||||
d_templ = templ;
|
||||
d_dst.create(d_src.rows - d_templ.rows + 1, d_src.cols - d_templ.cols + 1, CV_32F);
|
||||
|
||||
GPU_ON;
|
||||
gpu::matchTemplate(d_image, d_templ, d_result, CV_TM_CCORR);
|
||||
gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
@ -86,6 +84,7 @@ TEST(remap)
|
||||
gen(src, size, size, CV_8UC1, 0, 256);
|
||||
gen(xmap, size, size, CV_32F, 0, size);
|
||||
gen(ymap, size, size, CV_32F, 0, size);
|
||||
dst.create(xmap.size(), src.type());
|
||||
|
||||
CPU_ON;
|
||||
remap(src, dst, xmap, ymap, INTER_LINEAR);
|
||||
@ -94,6 +93,7 @@ TEST(remap)
|
||||
d_src = src;
|
||||
d_xmap = xmap;
|
||||
d_ymap = ymap;
|
||||
d_dst.create(d_xmap.size(), d_src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::remap(d_src, d_dst, d_xmap, d_ymap);
|
||||
@ -107,17 +107,19 @@ TEST(dft)
|
||||
Mat src, dst;
|
||||
gpu::GpuMat d_src, d_dst;
|
||||
|
||||
for (int size = 1000; size <= 8000; size *= 2)
|
||||
for (int size = 1000; size <= 4000; size *= 2)
|
||||
{
|
||||
SUBTEST << "size " << size << ", 32FC2, complex-to-complex";
|
||||
|
||||
gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
CPU_ON;
|
||||
dft(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
d_src = src;
|
||||
d_dst.create(d_src.size(), d_src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::dft(d_src, d_dst, Size(size, size));
|
||||
@ -136,12 +138,14 @@ TEST(cornerHarris)
|
||||
SUBTEST << "size " << size << ", 32FC1";
|
||||
|
||||
gen(src, size, size, CV_32F, 0, 1);
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
CPU_ON;
|
||||
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
|
||||
CPU_OFF;
|
||||
|
||||
d_src = src;
|
||||
d_dst.create(src.size(), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1);
|
||||
@ -150,22 +154,51 @@ TEST(cornerHarris)
|
||||
}
|
||||
|
||||
|
||||
TEST(memoryAllocation)
|
||||
TEST(integral)
|
||||
{
|
||||
Mat mat;
|
||||
gpu::GpuMat d_mat;
|
||||
Mat src, sum;
|
||||
gpu::GpuMat d_src, d_sum;
|
||||
|
||||
int begin = 100, end = 8000, step = 100;
|
||||
for (int size = 1000; size <= 8000; size *= 2)
|
||||
{
|
||||
SUBTEST << "size " << size << ", 8U";
|
||||
|
||||
DESCRIPTION << "32F matrices from " << begin << " to " << end;
|
||||
gen(src, size, size, CV_8U, 0, 256);
|
||||
sum.create(size + 1, size + 1, CV_32S);
|
||||
|
||||
CPU_ON;
|
||||
for (int size = begin; size <= end; size += step)
|
||||
mat.create(size, size, CV_32FC1);
|
||||
CPU_OFF;
|
||||
CPU_ON;
|
||||
integral(src, sum);
|
||||
CPU_OFF;
|
||||
|
||||
GPU_ON;
|
||||
for (int size = begin; size <= end; size += step)
|
||||
d_mat.create(size, size, CV_32FC1);
|
||||
GPU_OFF;
|
||||
d_src = src;
|
||||
d_sum.create(size + 1, size + 1, CV_32S);
|
||||
|
||||
GPU_ON;
|
||||
gpu::integral(d_src, d_sum);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(norm)
|
||||
{
|
||||
Mat src;
|
||||
gpu::GpuMat d_src;
|
||||
|
||||
for (int size = 1000; size <= 8000; size *= 2)
|
||||
{
|
||||
SUBTEST << "size " << size << ", 8U";
|
||||
|
||||
gen(src, size, size, CV_8U, 0, 256);
|
||||
|
||||
CPU_ON;
|
||||
norm(src);
|
||||
CPU_OFF;
|
||||
|
||||
d_src = src;
|
||||
|
||||
GPU_ON;
|
||||
gpu::norm(d_src);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user