Updated the GPU performance sample
This commit is contained in:
parent
c908c50131
commit
630d874e03
@ -60,14 +60,10 @@ void TestSystem::finishCurrentSubtest()
|
|||||||
// There is no need to print subtest statistics
|
// There is no need to print subtest statistics
|
||||||
return;
|
return;
|
||||||
|
|
||||||
//int cpu_time = static_cast<int>(cpu_elapsed_ / getTickFrequency() * 1000.0);
|
|
||||||
//int gpu_time = static_cast<int>(gpu_elapsed_ / getTickFrequency() * 1000.0);
|
|
||||||
|
|
||||||
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
|
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||||
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
|
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
|
||||||
|
|
||||||
double speedup = static_cast<double>(cpu_elapsed_) /
|
double speedup = static_cast<double>(cpu_elapsed_) / std::max((int64)1, gpu_elapsed_);
|
||||||
std::max((int64)1, gpu_elapsed_);
|
|
||||||
speedup_total_ += speedup;
|
speedup_total_ += speedup;
|
||||||
|
|
||||||
printMetrics(cpu_time, gpu_time, speedup);
|
printMetrics(cpu_time, gpu_time, speedup);
|
||||||
@ -77,6 +73,15 @@ void TestSystem::finishCurrentSubtest()
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
double TestSystem::meanTime(const vector<int64> &samples)
|
||||||
|
{
|
||||||
|
double sum = accumulate(samples.begin(), samples.end(), 0.);
|
||||||
|
if (samples.size() > 1)
|
||||||
|
return (sum - samples[0]) / (samples.size() - 1);
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void TestSystem::printHeading()
|
void TestSystem::printHeading()
|
||||||
{
|
{
|
||||||
cout << endl;
|
cout << endl;
|
||||||
@ -210,7 +215,7 @@ int main(int argc, const char* argv[])
|
|||||||
if (list)
|
if (list)
|
||||||
TestSystem::instance().setListMode(true);
|
TestSystem::instance().setListMode(true);
|
||||||
|
|
||||||
TestSystem::instance().setIters(iters);
|
TestSystem::instance().setNumIters(iters);
|
||||||
|
|
||||||
TestSystem::instance().run();
|
TestSystem::instance().run();
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ public:
|
|||||||
void setTestFilter(const std::string& val) { test_filter_ = val; }
|
void setTestFilter(const std::string& val) { test_filter_ = val; }
|
||||||
const std::string& testFilter() const { return test_filter_; }
|
const std::string& testFilter() const { return test_filter_; }
|
||||||
|
|
||||||
void setIters(int iters) { iters_ = iters; }
|
void setNumIters(int num_iters) { num_iters_ = num_iters; }
|
||||||
|
|
||||||
void addInit(Runnable* init) { inits_.push_back(init); }
|
void addInit(Runnable* init) { inits_.push_back(init); }
|
||||||
void addTest(Runnable* test) { tests_.push_back(test); }
|
void addTest(Runnable* test) { tests_.push_back(test); }
|
||||||
@ -56,21 +56,20 @@ public:
|
|||||||
return cur_subtest_description_;
|
return cur_subtest_description_;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool stop() const { return it_ >= iters_; }
|
bool stop() const { return cur_iter_idx_ >= num_iters_; }
|
||||||
|
|
||||||
void cpuOn() { cpu_started_ = cv::getTickCount(); }
|
void cpuOn() { cpu_started_ = cv::getTickCount(); }
|
||||||
void cpuOff()
|
void cpuOff()
|
||||||
{
|
{
|
||||||
int64 delta = cv::getTickCount() - cpu_started_;
|
int64 delta = cv::getTickCount() - cpu_started_;
|
||||||
cpu_times_.push_back(delta);
|
cpu_times_.push_back(delta);
|
||||||
++it_;
|
++cur_iter_idx_;
|
||||||
}
|
}
|
||||||
void cpuComplete()
|
void cpuComplete()
|
||||||
{
|
{
|
||||||
double delta_mean = std::accumulate(cpu_times_.begin(), cpu_times_.end(), 0.0) / iters_;
|
cpu_elapsed_ += meanTime(cpu_times_);
|
||||||
cpu_elapsed_ += delta_mean;
|
|
||||||
cur_subtest_is_empty_ = false;
|
cur_subtest_is_empty_ = false;
|
||||||
it_ = 0;
|
cur_iter_idx_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void gpuOn() { gpu_started_ = cv::getTickCount(); }
|
void gpuOn() { gpu_started_ = cv::getTickCount(); }
|
||||||
@ -78,30 +77,28 @@ public:
|
|||||||
{
|
{
|
||||||
int64 delta = cv::getTickCount() - gpu_started_;
|
int64 delta = cv::getTickCount() - gpu_started_;
|
||||||
gpu_times_.push_back(delta);
|
gpu_times_.push_back(delta);
|
||||||
++it_;
|
++cur_iter_idx_;
|
||||||
}
|
}
|
||||||
void gpuComplete()
|
void gpuComplete()
|
||||||
{
|
{
|
||||||
double delta_mean = std::accumulate(gpu_times_.begin(), gpu_times_.end(), 0.0) / iters_;
|
gpu_elapsed_ += meanTime(gpu_times_);
|
||||||
gpu_elapsed_ += delta_mean;
|
|
||||||
cur_subtest_is_empty_ = false;
|
cur_subtest_is_empty_ = false;
|
||||||
it_ = 0;
|
cur_iter_idx_ = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isListMode() const { return is_list_mode_; }
|
bool isListMode() const { return is_list_mode_; }
|
||||||
void setListMode(bool value) { is_list_mode_ = value; }
|
void setListMode(bool value) { is_list_mode_ = value; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),
|
TestSystem():
|
||||||
gpu_elapsed_(0), speedup_total_(0.0),
|
cur_subtest_is_empty_(true), cpu_elapsed_(0),
|
||||||
num_subtests_called_(0),
|
gpu_elapsed_(0), speedup_total_(0.0),
|
||||||
is_list_mode_(false)
|
num_subtests_called_(0), is_list_mode_(false),
|
||||||
|
num_iters_(10), cur_iter_idx_(0)
|
||||||
{
|
{
|
||||||
iters_ = 10;
|
cpu_times_.reserve(num_iters_);
|
||||||
it_ = 0;
|
gpu_times_.reserve(num_iters_);
|
||||||
cpu_times_.reserve(iters_);
|
}
|
||||||
gpu_times_.reserve(iters_);
|
|
||||||
}
|
|
||||||
|
|
||||||
void finishCurrentSubtest();
|
void finishCurrentSubtest();
|
||||||
void resetCurrentSubtest()
|
void resetCurrentSubtest()
|
||||||
@ -110,11 +107,13 @@ private:
|
|||||||
gpu_elapsed_ = 0;
|
gpu_elapsed_ = 0;
|
||||||
cur_subtest_description_.str("");
|
cur_subtest_description_.str("");
|
||||||
cur_subtest_is_empty_ = true;
|
cur_subtest_is_empty_ = true;
|
||||||
it_ = 0;
|
cur_iter_idx_ = 0;
|
||||||
cpu_times_.clear();
|
cpu_times_.clear();
|
||||||
gpu_times_.clear();
|
gpu_times_.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double meanTime(const std::vector<int64> &samples);
|
||||||
|
|
||||||
void printHeading();
|
void printHeading();
|
||||||
void printSummary();
|
void printSummary();
|
||||||
void printMetrics(double cpu_time, double gpu_time, double speedup);
|
void printMetrics(double cpu_time, double gpu_time, double speedup);
|
||||||
@ -136,8 +135,8 @@ private:
|
|||||||
|
|
||||||
bool is_list_mode_;
|
bool is_list_mode_;
|
||||||
|
|
||||||
int iters_;
|
int num_iters_;
|
||||||
int it_;
|
int cur_iter_idx_;
|
||||||
std::vector<int64> cpu_times_;
|
std::vector<int64> cpu_times_;
|
||||||
std::vector<int64> gpu_times_;
|
std::vector<int64> gpu_times_;
|
||||||
};
|
};
|
||||||
@ -164,13 +163,21 @@ private:
|
|||||||
|
|
||||||
#define SUBTEST TestSystem::instance().startNewSubtest()
|
#define SUBTEST TestSystem::instance().startNewSubtest()
|
||||||
|
|
||||||
#define CPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().cpuOn()
|
#define CPU_ON \
|
||||||
#define CPU_OFF TestSystem::instance().cpuOff(); } TestSystem::instance().cpuComplete()
|
while (!TestSystem::instance().stop()) { \
|
||||||
|
TestSystem::instance().cpuOn()
|
||||||
|
#define CPU_OFF \
|
||||||
|
TestSystem::instance().cpuOff(); \
|
||||||
|
} TestSystem::instance().cpuComplete()
|
||||||
|
|
||||||
#define GPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().gpuOn()
|
#define GPU_ON \
|
||||||
#define GPU_OFF TestSystem::instance().gpuOff(); } TestSystem::instance().gpuComplete()
|
while (!TestSystem::instance().stop()) { \
|
||||||
|
TestSystem::instance().gpuOn()
|
||||||
|
#define GPU_OFF \
|
||||||
|
TestSystem::instance().gpuOff(); \
|
||||||
|
} TestSystem::instance().gpuComplete()
|
||||||
|
|
||||||
// Generates matrix
|
// Generates a matrix
|
||||||
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
|
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
|
||||||
cv::Scalar high);
|
cv::Scalar high);
|
||||||
|
|
||||||
|
@ -1203,13 +1203,11 @@ TEST(FarnebackOpticalFlow)
|
|||||||
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
|
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
|
||||||
|
|
||||||
gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
|
gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
|
||||||
calc(d_frame0, d_frame1, d_flowx, d_flowy);
|
|
||||||
GPU_ON;
|
GPU_ON;
|
||||||
calc(d_frame0, d_frame1, d_flowx, d_flowy);
|
calc(d_frame0, d_frame1, d_flowx, d_flowy);
|
||||||
GPU_OFF;
|
GPU_OFF;
|
||||||
|
|
||||||
Mat flow;
|
Mat flow;
|
||||||
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
|
|
||||||
CPU_ON;
|
CPU_ON;
|
||||||
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
|
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
|
||||||
CPU_OFF;
|
CPU_OFF;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user