Updated the GPU performance sample

This commit is contained in:
Alexey Spizhevoy 2012-02-16 13:08:22 +00:00
parent c908c50131
commit 630d874e03
3 changed files with 45 additions and 35 deletions

View File

@ -60,14 +60,10 @@ void TestSystem::finishCurrentSubtest()
// There is no need to print subtest statistics // There is no need to print subtest statistics
return; return;
//int cpu_time = static_cast<int>(cpu_elapsed_ / getTickFrequency() * 1000.0);
//int gpu_time = static_cast<int>(gpu_elapsed_ / getTickFrequency() * 1000.0);
double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
double speedup = static_cast<double>(cpu_elapsed_) / double speedup = static_cast<double>(cpu_elapsed_) / std::max((int64)1, gpu_elapsed_);
std::max((int64)1, gpu_elapsed_);
speedup_total_ += speedup; speedup_total_ += speedup;
printMetrics(cpu_time, gpu_time, speedup); printMetrics(cpu_time, gpu_time, speedup);
@ -77,6 +73,15 @@ void TestSystem::finishCurrentSubtest()
} }
double TestSystem::meanTime(const vector<int64> &samples)
{
double sum = accumulate(samples.begin(), samples.end(), 0.);
if (samples.size() > 1)
return (sum - samples[0]) / (samples.size() - 1);
return sum;
}
void TestSystem::printHeading() void TestSystem::printHeading()
{ {
cout << endl; cout << endl;
@ -210,7 +215,7 @@ int main(int argc, const char* argv[])
if (list) if (list)
TestSystem::instance().setListMode(true); TestSystem::instance().setListMode(true);
TestSystem::instance().setIters(iters); TestSystem::instance().setNumIters(iters);
TestSystem::instance().run(); TestSystem::instance().run();

View File

@ -41,7 +41,7 @@ public:
void setTestFilter(const std::string& val) { test_filter_ = val; } void setTestFilter(const std::string& val) { test_filter_ = val; }
const std::string& testFilter() const { return test_filter_; } const std::string& testFilter() const { return test_filter_; }
void setIters(int iters) { iters_ = iters; } void setNumIters(int num_iters) { num_iters_ = num_iters; }
void addInit(Runnable* init) { inits_.push_back(init); } void addInit(Runnable* init) { inits_.push_back(init); }
void addTest(Runnable* test) { tests_.push_back(test); } void addTest(Runnable* test) { tests_.push_back(test); }
@ -56,21 +56,20 @@ public:
return cur_subtest_description_; return cur_subtest_description_;
} }
bool stop() const { return it_ >= iters_; } bool stop() const { return cur_iter_idx_ >= num_iters_; }
void cpuOn() { cpu_started_ = cv::getTickCount(); } void cpuOn() { cpu_started_ = cv::getTickCount(); }
void cpuOff() void cpuOff()
{ {
int64 delta = cv::getTickCount() - cpu_started_; int64 delta = cv::getTickCount() - cpu_started_;
cpu_times_.push_back(delta); cpu_times_.push_back(delta);
++it_; ++cur_iter_idx_;
} }
void cpuComplete() void cpuComplete()
{ {
double delta_mean = std::accumulate(cpu_times_.begin(), cpu_times_.end(), 0.0) / iters_; cpu_elapsed_ += meanTime(cpu_times_);
cpu_elapsed_ += delta_mean;
cur_subtest_is_empty_ = false; cur_subtest_is_empty_ = false;
it_ = 0; cur_iter_idx_ = 0;
} }
void gpuOn() { gpu_started_ = cv::getTickCount(); } void gpuOn() { gpu_started_ = cv::getTickCount(); }
@ -78,30 +77,28 @@ public:
{ {
int64 delta = cv::getTickCount() - gpu_started_; int64 delta = cv::getTickCount() - gpu_started_;
gpu_times_.push_back(delta); gpu_times_.push_back(delta);
++it_; ++cur_iter_idx_;
} }
void gpuComplete() void gpuComplete()
{ {
double delta_mean = std::accumulate(gpu_times_.begin(), gpu_times_.end(), 0.0) / iters_; gpu_elapsed_ += meanTime(gpu_times_);
gpu_elapsed_ += delta_mean;
cur_subtest_is_empty_ = false; cur_subtest_is_empty_ = false;
it_ = 0; cur_iter_idx_ = 0;
} }
bool isListMode() const { return is_list_mode_; } bool isListMode() const { return is_list_mode_; }
void setListMode(bool value) { is_list_mode_ = value; } void setListMode(bool value) { is_list_mode_ = value; }
private: private:
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0), TestSystem():
gpu_elapsed_(0), speedup_total_(0.0), cur_subtest_is_empty_(true), cpu_elapsed_(0),
num_subtests_called_(0), gpu_elapsed_(0), speedup_total_(0.0),
is_list_mode_(false) num_subtests_called_(0), is_list_mode_(false),
num_iters_(10), cur_iter_idx_(0)
{ {
iters_ = 10; cpu_times_.reserve(num_iters_);
it_ = 0; gpu_times_.reserve(num_iters_);
cpu_times_.reserve(iters_); }
gpu_times_.reserve(iters_);
}
void finishCurrentSubtest(); void finishCurrentSubtest();
void resetCurrentSubtest() void resetCurrentSubtest()
@ -110,11 +107,13 @@ private:
gpu_elapsed_ = 0; gpu_elapsed_ = 0;
cur_subtest_description_.str(""); cur_subtest_description_.str("");
cur_subtest_is_empty_ = true; cur_subtest_is_empty_ = true;
it_ = 0; cur_iter_idx_ = 0;
cpu_times_.clear(); cpu_times_.clear();
gpu_times_.clear(); gpu_times_.clear();
} }
double meanTime(const std::vector<int64> &samples);
void printHeading(); void printHeading();
void printSummary(); void printSummary();
void printMetrics(double cpu_time, double gpu_time, double speedup); void printMetrics(double cpu_time, double gpu_time, double speedup);
@ -136,8 +135,8 @@ private:
bool is_list_mode_; bool is_list_mode_;
int iters_; int num_iters_;
int it_; int cur_iter_idx_;
std::vector<int64> cpu_times_; std::vector<int64> cpu_times_;
std::vector<int64> gpu_times_; std::vector<int64> gpu_times_;
}; };
@ -164,13 +163,21 @@ private:
#define SUBTEST TestSystem::instance().startNewSubtest() #define SUBTEST TestSystem::instance().startNewSubtest()
#define CPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().cpuOn() #define CPU_ON \
#define CPU_OFF TestSystem::instance().cpuOff(); } TestSystem::instance().cpuComplete() while (!TestSystem::instance().stop()) { \
TestSystem::instance().cpuOn()
#define CPU_OFF \
TestSystem::instance().cpuOff(); \
} TestSystem::instance().cpuComplete()
#define GPU_ON while (!TestSystem::instance().stop()) { TestSystem::instance().gpuOn() #define GPU_ON \
#define GPU_OFF TestSystem::instance().gpuOff(); } TestSystem::instance().gpuComplete() while (!TestSystem::instance().stop()) { \
TestSystem::instance().gpuOn()
#define GPU_OFF \
TestSystem::instance().gpuOff(); \
} TestSystem::instance().gpuComplete()
// Generates matrix // Generates a matrix
void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low, void gen(cv::Mat& mat, int rows, int cols, int type, cv::Scalar low,
cv::Scalar high); cv::Scalar high);

View File

@ -1203,13 +1203,11 @@ TEST(FarnebackOpticalFlow)
calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0; calc.flags |= useGaussianBlur ? OPTFLOW_FARNEBACK_GAUSSIAN : 0;
gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy; gpu::GpuMat d_frame0(frame0), d_frame1(frame1), d_flowx, d_flowy;
calc(d_frame0, d_frame1, d_flowx, d_flowy);
GPU_ON; GPU_ON;
calc(d_frame0, d_frame1, d_flowx, d_flowy); calc(d_frame0, d_frame1, d_flowx, d_flowy);
GPU_OFF; GPU_OFF;
Mat flow; Mat flow;
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
CPU_ON; CPU_ON;
calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags); calcOpticalFlowFarneback(frame0, frame1, flow, calc.pyrScale, calc.numLevels, calc.winSize, calc.numIters, calc.polyN, calc.polySigma, calc.flags);
CPU_OFF; CPU_OFF;