added more GPU perf. tests, refactored
This commit is contained in:
@@ -9,11 +9,14 @@ void TestSystem::run()
|
|||||||
// Run initializers
|
// Run initializers
|
||||||
vector<Runnable*>::iterator it = inits_.begin();
|
vector<Runnable*>::iterator it = inits_.begin();
|
||||||
for (; it != inits_.end(); ++it)
|
for (; it != inits_.end(); ++it)
|
||||||
|
{
|
||||||
(*it)->run();
|
(*it)->run();
|
||||||
|
}
|
||||||
|
|
||||||
cout << setiosflags(ios_base::left);
|
cout << setiosflags(ios_base::left);
|
||||||
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
|
cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
|
||||||
<< setw(10) << "SPEEDUP" << "DESCRIPTION\n";
|
<< setw(10) << "SPEEDUP"
|
||||||
|
<< "DESCRIPTION\n";
|
||||||
cout << resetiosflags(ios_base::left);
|
cout << resetiosflags(ios_base::left);
|
||||||
|
|
||||||
// Run tests
|
// Run tests
|
||||||
@@ -24,30 +27,23 @@ void TestSystem::run()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
(*it)->run();
|
(*it)->run();
|
||||||
flush_subtest_data();
|
flushSubtestData();
|
||||||
}
|
}
|
||||||
catch (const cv::Exception& e)
|
catch (const cv::Exception&)
|
||||||
{
|
{
|
||||||
cout << TAB << "error";
|
resetSubtestData();
|
||||||
switch (e.code)
|
|
||||||
{
|
|
||||||
case CV_StsNoMem: cout << ": out of memory"; break;
|
|
||||||
}
|
|
||||||
if (!description_.str().empty())
|
|
||||||
cout << " [" << description_.str() << "]";
|
|
||||||
cout << endl;
|
|
||||||
reset_subtest_data();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << setiosflags(ios_base::fixed | ios_base::left);
|
cout << setiosflags(ios_base::fixed);
|
||||||
cout << "\naverage GPU speedup: x" << setprecision(3)
|
cout << "\naverage GPU speedup: x"
|
||||||
<< speedup_total_ / num_subtests_called_ << endl;
|
<< setprecision(3) << speedup_total_ / num_subtests_called_
|
||||||
cout << resetiosflags(ios_base::fixed | ios_base::left);
|
<< endl;
|
||||||
|
cout << resetiosflags(ios_base::fixed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void TestSystem::flush_subtest_data()
|
void TestSystem::flushSubtestData()
|
||||||
{
|
{
|
||||||
if (!can_flush_)
|
if (!can_flush_)
|
||||||
return;
|
return;
|
||||||
@@ -58,9 +54,10 @@ void TestSystem::flush_subtest_data()
|
|||||||
double speedup = static_cast<double>(cpu_time) / std::max(1, gpu_time);
|
double speedup = static_cast<double>(cpu_time) / std::max(1, gpu_time);
|
||||||
speedup_total_ += speedup;
|
speedup_total_ += speedup;
|
||||||
|
|
||||||
cout << TAB << setiosflags(ios_base::fixed | ios_base::left);
|
cout << TAB << setiosflags(ios_base::left);
|
||||||
|
|
||||||
stringstream stream;
|
stringstream stream;
|
||||||
|
|
||||||
stream << cpu_time;
|
stream << cpu_time;
|
||||||
cout << setw(10) << stream.str();
|
cout << setw(10) << stream.str();
|
||||||
|
|
||||||
@@ -73,11 +70,10 @@ void TestSystem::flush_subtest_data()
|
|||||||
cout << setw(10) << stream.str();
|
cout << setw(10) << stream.str();
|
||||||
|
|
||||||
cout << description_.str();
|
cout << description_.str();
|
||||||
|
cout << resetiosflags(ios_base::left) << endl;
|
||||||
cout << resetiosflags(ios_base::fixed | ios_base::left) << endl;
|
|
||||||
|
|
||||||
num_subtests_called_++;
|
num_subtests_called_++;
|
||||||
reset_subtest_data();
|
resetSubtestData();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@@ -38,6 +38,13 @@ public:
|
|||||||
|
|
||||||
void run();
|
void run();
|
||||||
|
|
||||||
|
// Ends current subtest and starts new one
|
||||||
|
std::stringstream& subtest()
|
||||||
|
{
|
||||||
|
flushSubtestData();
|
||||||
|
return description_;
|
||||||
|
}
|
||||||
|
|
||||||
void cpuOn() { cpu_started_ = cv::getTickCount(); }
|
void cpuOn() { cpu_started_ = cv::getTickCount(); }
|
||||||
|
|
||||||
void cpuOff()
|
void cpuOff()
|
||||||
@@ -56,20 +63,13 @@ public:
|
|||||||
can_flush_ = true;
|
can_flush_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ends current subtest and starts new one
|
|
||||||
std::stringstream& subtest()
|
|
||||||
{
|
|
||||||
flush_subtest_data();
|
|
||||||
return description_;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TestSystem(): can_flush_(false), cpu_elapsed_(0), gpu_elapsed_(0),
|
TestSystem(): can_flush_(false), cpu_elapsed_(0), gpu_elapsed_(0),
|
||||||
speedup_total_(0.0), num_subtests_called_(0) {};
|
speedup_total_(0.0), num_subtests_called_(0) {};
|
||||||
|
|
||||||
void flush_subtest_data();
|
void flushSubtestData();
|
||||||
|
|
||||||
void reset_subtest_data()
|
void resetSubtestData()
|
||||||
{
|
{
|
||||||
cpu_elapsed_ = 0;
|
cpu_elapsed_ = 0;
|
||||||
gpu_elapsed_ = 0;
|
gpu_elapsed_ = 0;
|
||||||
@@ -93,17 +93,6 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
#define TEST(name) \
|
|
||||||
struct name##_test: Runnable \
|
|
||||||
{ \
|
|
||||||
name##_test(): Runnable(#name) { \
|
|
||||||
TestSystem::instance()->addTest(this); \
|
|
||||||
} \
|
|
||||||
void run(); \
|
|
||||||
} name##_test_instance; \
|
|
||||||
void name##_test::run()
|
|
||||||
|
|
||||||
|
|
||||||
#define INIT(name) \
|
#define INIT(name) \
|
||||||
struct name##_init: Runnable \
|
struct name##_init: Runnable \
|
||||||
{ \
|
{ \
|
||||||
@@ -115,12 +104,22 @@ private:
|
|||||||
void name##_init::run()
|
void name##_init::run()
|
||||||
|
|
||||||
|
|
||||||
|
#define TEST(name) \
|
||||||
|
struct name##_test: Runnable \
|
||||||
|
{ \
|
||||||
|
name##_test(): Runnable(#name) { \
|
||||||
|
TestSystem::instance()->addTest(this); \
|
||||||
|
} \
|
||||||
|
void run(); \
|
||||||
|
} name##_test_instance; \
|
||||||
|
void name##_test::run()
|
||||||
|
|
||||||
|
#define SUBTEST TestSystem::instance()->subtest()
|
||||||
|
#define DESCRIPTION TestSystem::instance()->subtest()
|
||||||
#define CPU_ON TestSystem::instance()->cpuOn()
|
#define CPU_ON TestSystem::instance()->cpuOn()
|
||||||
#define GPU_ON TestSystem::instance()->gpuOn()
|
#define GPU_ON TestSystem::instance()->gpuOn()
|
||||||
#define CPU_OFF TestSystem::instance()->cpuOff()
|
#define CPU_OFF TestSystem::instance()->cpuOff()
|
||||||
#define GPU_OFF TestSystem::instance()->gpuOff()
|
#define GPU_OFF TestSystem::instance()->gpuOff()
|
||||||
#define SUBTEST TestSystem::instance()->subtest()
|
|
||||||
#define DESCRIPTION TestSystem::instance()->subtest()
|
|
||||||
|
|
||||||
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
|
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
|
||||||
cv::Scalar high);
|
cv::Scalar high);
|
||||||
|
@@ -6,42 +6,40 @@
|
|||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace cv;
|
using namespace cv;
|
||||||
|
|
||||||
// This code calls CUFFT DFT and initializes that lib
|
INIT(matchTemplate)
|
||||||
INIT(CUFFT_library)
|
|
||||||
{
|
{
|
||||||
Mat src, templ;
|
Mat src; gen(src, 500, 500, CV_32F, 0, 1);
|
||||||
gen(src, 500, 500, CV_32F, 0, 1);
|
Mat templ; gen(templ, 500, 500, CV_32F, 0, 1);
|
||||||
gen(templ, 500, 500, CV_32F, 0, 1);
|
|
||||||
|
|
||||||
gpu::GpuMat d_src(src);
|
gpu::GpuMat d_src(src), d_templ(templ), d_dst;
|
||||||
gpu::GpuMat d_templ(templ);
|
|
||||||
gpu::GpuMat d_result;
|
|
||||||
|
|
||||||
gpu::matchTemplate(d_src, d_templ, d_result, CV_TM_CCORR);
|
gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST(matchTemplate)
|
TEST(matchTemplate)
|
||||||
{
|
{
|
||||||
Mat src, templ, result;
|
Mat src, templ, dst;
|
||||||
gen(src, 3000, 3000, CV_32F, 0, 1);
|
gen(src, 3000, 3000, CV_32F, 0, 1);
|
||||||
|
|
||||||
gpu::GpuMat d_image(src), d_templ, d_result;
|
gpu::GpuMat d_src(src), d_templ, d_dst;
|
||||||
|
|
||||||
for (int templ_size = 5; templ_size <= 1000; templ_size *= 2)
|
for (int templ_size = 5; templ_size < 200; templ_size *= 5)
|
||||||
{
|
{
|
||||||
SUBTEST << "src " << src.rows << ", templ " << templ_size << ", 32F, CCORR";
|
SUBTEST << "src " << src.rows << ", templ " << templ_size << ", 32F, CCORR";
|
||||||
|
|
||||||
gen(templ, templ_size, templ_size, CV_32F, 0, 1);
|
gen(templ, templ_size, templ_size, CV_32F, 0, 1);
|
||||||
|
dst.create(src.rows - templ.rows + 1, src.cols - templ.cols + 1, CV_32F);
|
||||||
|
|
||||||
CPU_ON;
|
CPU_ON;
|
||||||
matchTemplate(src, templ, result, CV_TM_CCORR);
|
matchTemplate(src, templ, dst, CV_TM_CCORR);
|
||||||
CPU_OFF;
|
CPU_OFF;
|
||||||
|
|
||||||
d_templ = templ;
|
d_templ = templ;
|
||||||
|
d_dst.create(d_src.rows - d_templ.rows + 1, d_src.cols - d_templ.cols + 1, CV_32F);
|
||||||
|
|
||||||
GPU_ON;
|
GPU_ON;
|
||||||
gpu::matchTemplate(d_image, d_templ, d_result, CV_TM_CCORR);
|
gpu::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
|
||||||
GPU_OFF;
|
GPU_OFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -86,6 +84,7 @@ TEST(remap)
|
|||||||
gen(src, size, size, CV_8UC1, 0, 256);
|
gen(src, size, size, CV_8UC1, 0, 256);
|
||||||
gen(xmap, size, size, CV_32F, 0, size);
|
gen(xmap, size, size, CV_32F, 0, size);
|
||||||
gen(ymap, size, size, CV_32F, 0, size);
|
gen(ymap, size, size, CV_32F, 0, size);
|
||||||
|
dst.create(xmap.size(), src.type());
|
||||||
|
|
||||||
CPU_ON;
|
CPU_ON;
|
||||||
remap(src, dst, xmap, ymap, INTER_LINEAR);
|
remap(src, dst, xmap, ymap, INTER_LINEAR);
|
||||||
@@ -94,6 +93,7 @@ TEST(remap)
|
|||||||
d_src = src;
|
d_src = src;
|
||||||
d_xmap = xmap;
|
d_xmap = xmap;
|
||||||
d_ymap = ymap;
|
d_ymap = ymap;
|
||||||
|
d_dst.create(d_xmap.size(), d_src.type());
|
||||||
|
|
||||||
GPU_ON;
|
GPU_ON;
|
||||||
gpu::remap(d_src, d_dst, d_xmap, d_ymap);
|
gpu::remap(d_src, d_dst, d_xmap, d_ymap);
|
||||||
@@ -107,17 +107,19 @@ TEST(dft)
|
|||||||
Mat src, dst;
|
Mat src, dst;
|
||||||
gpu::GpuMat d_src, d_dst;
|
gpu::GpuMat d_src, d_dst;
|
||||||
|
|
||||||
for (int size = 1000; size <= 8000; size *= 2)
|
for (int size = 1000; size <= 4000; size *= 2)
|
||||||
{
|
{
|
||||||
SUBTEST << "size " << size << ", 32FC2, complex-to-complex";
|
SUBTEST << "size " << size << ", 32FC2, complex-to-complex";
|
||||||
|
|
||||||
gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
|
gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
|
||||||
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
CPU_ON;
|
CPU_ON;
|
||||||
dft(src, dst);
|
dft(src, dst);
|
||||||
CPU_OFF;
|
CPU_OFF;
|
||||||
|
|
||||||
d_src = src;
|
d_src = src;
|
||||||
|
d_dst.create(d_src.size(), d_src.type());
|
||||||
|
|
||||||
GPU_ON;
|
GPU_ON;
|
||||||
gpu::dft(d_src, d_dst, Size(size, size));
|
gpu::dft(d_src, d_dst, Size(size, size));
|
||||||
@@ -136,12 +138,14 @@ TEST(cornerHarris)
|
|||||||
SUBTEST << "size " << size << ", 32FC1";
|
SUBTEST << "size " << size << ", 32FC1";
|
||||||
|
|
||||||
gen(src, size, size, CV_32F, 0, 1);
|
gen(src, size, size, CV_32F, 0, 1);
|
||||||
|
dst.create(src.size(), src.type());
|
||||||
|
|
||||||
CPU_ON;
|
CPU_ON;
|
||||||
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
|
cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
|
||||||
CPU_OFF;
|
CPU_OFF;
|
||||||
|
|
||||||
d_src = src;
|
d_src = src;
|
||||||
|
d_dst.create(src.size(), src.type());
|
||||||
|
|
||||||
GPU_ON;
|
GPU_ON;
|
||||||
gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1);
|
gpu::cornerHarris(d_src, d_dst, 5, 7, 0.1);
|
||||||
@@ -150,22 +154,51 @@ TEST(cornerHarris)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
TEST(memoryAllocation)
|
TEST(integral)
|
||||||
{
|
{
|
||||||
Mat mat;
|
Mat src, sum;
|
||||||
gpu::GpuMat d_mat;
|
gpu::GpuMat d_src, d_sum;
|
||||||
|
|
||||||
int begin = 100, end = 8000, step = 100;
|
for (int size = 1000; size <= 8000; size *= 2)
|
||||||
|
{
|
||||||
|
SUBTEST << "size " << size << ", 8U";
|
||||||
|
|
||||||
DESCRIPTION << "32F matrices from " << begin << " to " << end;
|
gen(src, size, size, CV_8U, 0, 256);
|
||||||
|
sum.create(size + 1, size + 1, CV_32S);
|
||||||
|
|
||||||
CPU_ON;
|
CPU_ON;
|
||||||
for (int size = begin; size <= end; size += step)
|
integral(src, sum);
|
||||||
mat.create(size, size, CV_32FC1);
|
CPU_OFF;
|
||||||
CPU_OFF;
|
|
||||||
|
|
||||||
GPU_ON;
|
d_src = src;
|
||||||
for (int size = begin; size <= end; size += step)
|
d_sum.create(size + 1, size + 1, CV_32S);
|
||||||
d_mat.create(size, size, CV_32FC1);
|
|
||||||
GPU_OFF;
|
GPU_ON;
|
||||||
|
gpu::integral(d_src, d_sum);
|
||||||
|
GPU_OFF;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
TEST(norm)
|
||||||
|
{
|
||||||
|
Mat src;
|
||||||
|
gpu::GpuMat d_src;
|
||||||
|
|
||||||
|
for (int size = 1000; size <= 8000; size *= 2)
|
||||||
|
{
|
||||||
|
SUBTEST << "size " << size << ", 8U";
|
||||||
|
|
||||||
|
gen(src, size, size, CV_8U, 0, 256);
|
||||||
|
|
||||||
|
CPU_ON;
|
||||||
|
norm(src);
|
||||||
|
CPU_OFF;
|
||||||
|
|
||||||
|
d_src = src;
|
||||||
|
|
||||||
|
GPU_ON;
|
||||||
|
gpu::norm(d_src);
|
||||||
|
GPU_OFF;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user