Updated optimal block size estimation for the convolve() function

This commit is contained in:
Alexey Spizhevoy 2011-10-03 14:05:52 +00:00
parent bee68e519a
commit 8a799aa89a
5 changed files with 37 additions and 16 deletions

View File

@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()),
SANITY_CHECK(dst_host);
}
PERF_TEST_P(DevInfo_Size, convolve, testing::Combine(testing::ValuesIn(devices()),
testing::Values(GPU_TYPICAL_MAT_SIZES)))
PERF_TEST_P(DevInfo_Int_Int, convolve, testing::Combine(testing::ValuesIn(devices()),
testing::Values(512, 1024, 1536, 2048, 2560, 3072, 3584),
testing::Values(27, 32, 64)))
{
DeviceInfo devInfo = std::tr1::get<0>(GetParam());
Size size = std::tr1::get<1>(GetParam());
int image_size = std::tr1::get<1>(GetParam());
int templ_size = std::tr1::get<2>(GetParam());
setDevice(devInfo.deviceID());
Mat image_host(size, CV_32FC1);
Mat templ_host(size, CV_32FC1);
Mat image_host(image_size, image_size, CV_32FC1);
Mat templ_host(templ_size, templ_size, CV_32FC1);
declare.in(image_host, templ_host, WARMUP_RNG);

View File

@ -32,6 +32,7 @@ struct CvtColorInfo
typedef TestBaseWithParam<DeviceInfo> DevInfo;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_Int_Int;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType;
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType;

View File

@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)
{
int scale = 40;
Size bsize_min(1024, 1024);
Size bsize_min(512, 512);
// Check whether we use Fermi generation or newer GPU
if (DeviceInfo().majorVersion() >= 2)
{
bsize_min.width = 2048;
bsize_min.height = 2048;
bsize_min.width = 1024;
bsize_min.height = 1024;
}
Size bsize(std::max(templ_size.width * scale, bsize_min.width),
std::max(templ_size.height * scale, bsize_min.height));
int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;
int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;
bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;
bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;
bsize.width = std::min(bsize.width, result_size.width);
bsize.height = std::min(bsize.height, result_size.height);
return bsize;

View File

@ -8,9 +8,15 @@ using namespace cv;
void TestSystem::run()
{
// Run test initializers
vector<Runnable*>::iterator it = inits_.begin();
for (; it != inits_.end(); ++it)
if (is_list_mode_)
{
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
cout << (*it)->name() << endl;
return;
}
// Run test initializers
for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
{
if ((*it)->name().find(test_filter_, 0) != string::npos)
(*it)->run();
@ -19,8 +25,7 @@ void TestSystem::run()
printHeading();
// Run tests
it = tests_.begin();
for (; it != tests_.end(); ++it)
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
{
try
{
@ -145,13 +150,15 @@ int main(int argc, char** argv)
string key = argv[i];
if (key == "--help")
{
cout << "Usage: performance_gpu [--filter <test_filter>] [--working-dir <working_dir_with_slash>]\n";
cout << "Usage: performance_gpu [--ls] [--filter <test_filter>] [--workdir <working_dir_with_slash>]\n";
return 0;
}
if (key == "--filter" && i + 1 < argc)
TestSystem::instance().setTestFilter(argv[++i]);
else if (key == "--working-dir" && i + 1 < argc)
else if (key == "--workdir" && i + 1 < argc)
TestSystem::instance().setWorkingDir(argv[++i]);
else if (key == "--ls")
TestSystem::instance().setListMode(true);
else
{
cout << "Unknown parameter: '" << key << "'" << endl;

View File

@ -68,10 +68,14 @@ public:
cur_subtest_is_empty_ = false;
}
bool isListMode() const { return is_list_mode_; }
void setListMode(bool value) { is_list_mode_ = value; }
private:
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),
gpu_elapsed_(0), speedup_total_(0.0),
num_subtests_called_(0) {}
num_subtests_called_(0),
is_list_mode_(false) {}
void finishCurrentSubtest();
void resetCurrentSubtest()
@ -100,6 +104,8 @@ private:
double speedup_total_;
int num_subtests_called_;
bool is_list_mode_;
};