Updated optimal block size estimation for the convolve() function
This commit is contained in:
parent
bee68e519a
commit
8a799aa89a
@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()),
|
|||||||
SANITY_CHECK(dst_host);
|
SANITY_CHECK(dst_host);
|
||||||
}
|
}
|
||||||
|
|
||||||
PERF_TEST_P(DevInfo_Size, convolve, testing::Combine(testing::ValuesIn(devices()),
|
PERF_TEST_P(DevInfo_Int_Int, convolve, testing::Combine(testing::ValuesIn(devices()),
|
||||||
testing::Values(GPU_TYPICAL_MAT_SIZES)))
|
testing::Values(512, 1024, 1536, 2048, 2560, 3072, 3584),
|
||||||
|
testing::Values(27, 32, 64)))
|
||||||
{
|
{
|
||||||
DeviceInfo devInfo = std::tr1::get<0>(GetParam());
|
DeviceInfo devInfo = std::tr1::get<0>(GetParam());
|
||||||
Size size = std::tr1::get<1>(GetParam());
|
int image_size = std::tr1::get<1>(GetParam());
|
||||||
|
int templ_size = std::tr1::get<2>(GetParam());
|
||||||
|
|
||||||
setDevice(devInfo.deviceID());
|
setDevice(devInfo.deviceID());
|
||||||
|
|
||||||
Mat image_host(size, CV_32FC1);
|
Mat image_host(image_size, image_size, CV_32FC1);
|
||||||
Mat templ_host(size, CV_32FC1);
|
Mat templ_host(templ_size, templ_size, CV_32FC1);
|
||||||
|
|
||||||
declare.in(image_host, templ_host, WARMUP_RNG);
|
declare.in(image_host, templ_host, WARMUP_RNG);
|
||||||
|
|
||||||
|
@ -32,6 +32,7 @@ struct CvtColorInfo
|
|||||||
|
|
||||||
typedef TestBaseWithParam<DeviceInfo> DevInfo;
|
typedef TestBaseWithParam<DeviceInfo> DevInfo;
|
||||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size;
|
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size;
|
||||||
|
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_Int_Int;
|
||||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType;
|
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType;
|
||||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType;
|
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType;
|
||||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType;
|
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType;
|
||||||
|
@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
|
|||||||
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)
|
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)
|
||||||
{
|
{
|
||||||
int scale = 40;
|
int scale = 40;
|
||||||
Size bsize_min(1024, 1024);
|
Size bsize_min(512, 512);
|
||||||
|
|
||||||
// Check whether we use Fermi generation or newer GPU
|
// Check whether we use Fermi generation or newer GPU
|
||||||
if (DeviceInfo().majorVersion() >= 2)
|
if (DeviceInfo().majorVersion() >= 2)
|
||||||
{
|
{
|
||||||
bsize_min.width = 2048;
|
bsize_min.width = 1024;
|
||||||
bsize_min.height = 2048;
|
bsize_min.height = 1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
Size bsize(std::max(templ_size.width * scale, bsize_min.width),
|
Size bsize(std::max(templ_size.width * scale, bsize_min.width),
|
||||||
std::max(templ_size.height * scale, bsize_min.height));
|
std::max(templ_size.height * scale, bsize_min.height));
|
||||||
|
|
||||||
|
int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;
|
||||||
|
int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;
|
||||||
|
bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;
|
||||||
|
bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;
|
||||||
|
|
||||||
bsize.width = std::min(bsize.width, result_size.width);
|
bsize.width = std::min(bsize.width, result_size.width);
|
||||||
bsize.height = std::min(bsize.height, result_size.height);
|
bsize.height = std::min(bsize.height, result_size.height);
|
||||||
return bsize;
|
return bsize;
|
||||||
|
@ -8,9 +8,15 @@ using namespace cv;
|
|||||||
|
|
||||||
void TestSystem::run()
|
void TestSystem::run()
|
||||||
{
|
{
|
||||||
// Run test initializers
|
if (is_list_mode_)
|
||||||
vector<Runnable*>::iterator it = inits_.begin();
|
{
|
||||||
for (; it != inits_.end(); ++it)
|
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
|
||||||
|
cout << (*it)->name() << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run test initializers
|
||||||
|
for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
|
||||||
{
|
{
|
||||||
if ((*it)->name().find(test_filter_, 0) != string::npos)
|
if ((*it)->name().find(test_filter_, 0) != string::npos)
|
||||||
(*it)->run();
|
(*it)->run();
|
||||||
@ -19,8 +25,7 @@ void TestSystem::run()
|
|||||||
printHeading();
|
printHeading();
|
||||||
|
|
||||||
// Run tests
|
// Run tests
|
||||||
it = tests_.begin();
|
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
|
||||||
for (; it != tests_.end(); ++it)
|
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -145,13 +150,15 @@ int main(int argc, char** argv)
|
|||||||
string key = argv[i];
|
string key = argv[i];
|
||||||
if (key == "--help")
|
if (key == "--help")
|
||||||
{
|
{
|
||||||
cout << "Usage: performance_gpu [--filter <test_filter>] [--working-dir <working_dir_with_slash>]\n";
|
cout << "Usage: performance_gpu [--ls] [--filter <test_filter>] [--workdir <working_dir_with_slash>]\n";
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (key == "--filter" && i + 1 < argc)
|
if (key == "--filter" && i + 1 < argc)
|
||||||
TestSystem::instance().setTestFilter(argv[++i]);
|
TestSystem::instance().setTestFilter(argv[++i]);
|
||||||
else if (key == "--working-dir" && i + 1 < argc)
|
else if (key == "--workdir" && i + 1 < argc)
|
||||||
TestSystem::instance().setWorkingDir(argv[++i]);
|
TestSystem::instance().setWorkingDir(argv[++i]);
|
||||||
|
else if (key == "--ls")
|
||||||
|
TestSystem::instance().setListMode(true);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cout << "Unknown parameter: '" << key << "'" << endl;
|
cout << "Unknown parameter: '" << key << "'" << endl;
|
||||||
|
@ -68,10 +68,14 @@ public:
|
|||||||
cur_subtest_is_empty_ = false;
|
cur_subtest_is_empty_ = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isListMode() const { return is_list_mode_; }
|
||||||
|
void setListMode(bool value) { is_list_mode_ = value; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),
|
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),
|
||||||
gpu_elapsed_(0), speedup_total_(0.0),
|
gpu_elapsed_(0), speedup_total_(0.0),
|
||||||
num_subtests_called_(0) {}
|
num_subtests_called_(0),
|
||||||
|
is_list_mode_(false) {}
|
||||||
|
|
||||||
void finishCurrentSubtest();
|
void finishCurrentSubtest();
|
||||||
void resetCurrentSubtest()
|
void resetCurrentSubtest()
|
||||||
@ -100,6 +104,8 @@ private:
|
|||||||
|
|
||||||
double speedup_total_;
|
double speedup_total_;
|
||||||
int num_subtests_called_;
|
int num_subtests_called_;
|
||||||
|
|
||||||
|
bool is_list_mode_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user