Updated optimal block size estimation for the convolve() function
This commit is contained in:
parent
bee68e519a
commit
8a799aa89a
@ -735,16 +735,18 @@ PERF_TEST_P(DevInfo_Size, dft, testing::Combine(testing::ValuesIn(devices()),
|
||||
SANITY_CHECK(dst_host);
|
||||
}
|
||||
|
||||
PERF_TEST_P(DevInfo_Size, convolve, testing::Combine(testing::ValuesIn(devices()),
|
||||
testing::Values(GPU_TYPICAL_MAT_SIZES)))
|
||||
PERF_TEST_P(DevInfo_Int_Int, convolve, testing::Combine(testing::ValuesIn(devices()),
|
||||
testing::Values(512, 1024, 1536, 2048, 2560, 3072, 3584),
|
||||
testing::Values(27, 32, 64)))
|
||||
{
|
||||
DeviceInfo devInfo = std::tr1::get<0>(GetParam());
|
||||
Size size = std::tr1::get<1>(GetParam());
|
||||
int image_size = std::tr1::get<1>(GetParam());
|
||||
int templ_size = std::tr1::get<2>(GetParam());
|
||||
|
||||
setDevice(devInfo.deviceID());
|
||||
|
||||
Mat image_host(size, CV_32FC1);
|
||||
Mat templ_host(size, CV_32FC1);
|
||||
Mat image_host(image_size, image_size, CV_32FC1);
|
||||
Mat templ_host(templ_size, templ_size, CV_32FC1);
|
||||
|
||||
declare.in(image_host, templ_host, WARMUP_RNG);
|
||||
|
||||
|
@ -32,6 +32,7 @@ struct CvtColorInfo
|
||||
|
||||
typedef TestBaseWithParam<DeviceInfo> DevInfo;
|
||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size> > DevInfo_Size;
|
||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, int, int> > DevInfo_Int_Int;
|
||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, MatType> > DevInfo_MatType;
|
||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType> > DevInfo_Size_MatType;
|
||||
typedef TestBaseWithParam< std::tr1::tuple<DeviceInfo, Size, MatType, MatType> > DevInfo_Size_MatType_MatType;
|
||||
|
@ -1546,18 +1546,23 @@ void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size)
|
||||
Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size templ_size)
|
||||
{
|
||||
int scale = 40;
|
||||
Size bsize_min(1024, 1024);
|
||||
Size bsize_min(512, 512);
|
||||
|
||||
// Check whether we use Fermi generation or newer GPU
|
||||
if (DeviceInfo().majorVersion() >= 2)
|
||||
{
|
||||
bsize_min.width = 2048;
|
||||
bsize_min.height = 2048;
|
||||
bsize_min.width = 1024;
|
||||
bsize_min.height = 1024;
|
||||
}
|
||||
|
||||
Size bsize(std::max(templ_size.width * scale, bsize_min.width),
|
||||
std::max(templ_size.height * scale, bsize_min.height));
|
||||
|
||||
int blocks_per_row = (result_size.width + bsize.width - 1) / bsize.width;
|
||||
int blocks_per_col = (result_size.height + bsize.height - 1) / bsize.height;
|
||||
bsize.width = (result_size.width + blocks_per_row - 1) / blocks_per_row;
|
||||
bsize.height = (result_size.height + blocks_per_col - 1) / blocks_per_col;
|
||||
|
||||
bsize.width = std::min(bsize.width, result_size.width);
|
||||
bsize.height = std::min(bsize.height, result_size.height);
|
||||
return bsize;
|
||||
|
@ -8,9 +8,15 @@ using namespace cv;
|
||||
|
||||
void TestSystem::run()
|
||||
{
|
||||
// Run test initializers
|
||||
vector<Runnable*>::iterator it = inits_.begin();
|
||||
for (; it != inits_.end(); ++it)
|
||||
if (is_list_mode_)
|
||||
{
|
||||
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
|
||||
cout << (*it)->name() << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// Run test initializers
|
||||
for (vector<Runnable*>::iterator it = inits_.begin(); it != inits_.end(); ++it)
|
||||
{
|
||||
if ((*it)->name().find(test_filter_, 0) != string::npos)
|
||||
(*it)->run();
|
||||
@ -19,8 +25,7 @@ void TestSystem::run()
|
||||
printHeading();
|
||||
|
||||
// Run tests
|
||||
it = tests_.begin();
|
||||
for (; it != tests_.end(); ++it)
|
||||
for (vector<Runnable*>::iterator it = tests_.begin(); it != tests_.end(); ++it)
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -145,13 +150,15 @@ int main(int argc, char** argv)
|
||||
string key = argv[i];
|
||||
if (key == "--help")
|
||||
{
|
||||
cout << "Usage: performance_gpu [--filter <test_filter>] [--working-dir <working_dir_with_slash>]\n";
|
||||
cout << "Usage: performance_gpu [--ls] [--filter <test_filter>] [--workdir <working_dir_with_slash>]\n";
|
||||
return 0;
|
||||
}
|
||||
if (key == "--filter" && i + 1 < argc)
|
||||
TestSystem::instance().setTestFilter(argv[++i]);
|
||||
else if (key == "--working-dir" && i + 1 < argc)
|
||||
else if (key == "--workdir" && i + 1 < argc)
|
||||
TestSystem::instance().setWorkingDir(argv[++i]);
|
||||
else if (key == "--ls")
|
||||
TestSystem::instance().setListMode(true);
|
||||
else
|
||||
{
|
||||
cout << "Unknown parameter: '" << key << "'" << endl;
|
||||
|
@ -68,10 +68,14 @@ public:
|
||||
cur_subtest_is_empty_ = false;
|
||||
}
|
||||
|
||||
bool isListMode() const { return is_list_mode_; }
|
||||
void setListMode(bool value) { is_list_mode_ = value; }
|
||||
|
||||
private:
|
||||
TestSystem(): cur_subtest_is_empty_(true), cpu_elapsed_(0),
|
||||
gpu_elapsed_(0), speedup_total_(0.0),
|
||||
num_subtests_called_(0) {}
|
||||
num_subtests_called_(0),
|
||||
is_list_mode_(false) {}
|
||||
|
||||
void finishCurrentSubtest();
|
||||
void resetCurrentSubtest()
|
||||
@ -100,6 +104,8 @@ private:
|
||||
|
||||
double speedup_total_;
|
||||
int num_subtests_called_;
|
||||
|
||||
bool is_list_mode_;
|
||||
};
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user