Merge pull request #1355 from jet47:gpu-stereo-multi-gpu-sample
This commit is contained in:
commit
ff2b12334d
@ -1,149 +1,496 @@
|
||||
/* This sample demonstrates working on one piece of data using two GPUs.
|
||||
It splits input into two parts and processes them separately on different
|
||||
GPUs. */
|
||||
// This sample demonstrates working on one piece of data using two GPUs.
|
||||
// It splits input into two parts and processes them separately on different GPUs.
|
||||
|
||||
// Disable some warnings which are caused with CUDA headers
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4201 4408 4100)
|
||||
#ifdef WIN32
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <pthread.h>
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include "cvconfig.h"
|
||||
#include <iomanip>
|
||||
|
||||
#include "opencv2/core/core.hpp"
|
||||
#include "opencv2/highgui/highgui.hpp"
|
||||
#include "opencv2/imgproc/imgproc.hpp"
|
||||
#include "opencv2/contrib/contrib.hpp"
|
||||
#include "opencv2/gpu/gpu.hpp"
|
||||
|
||||
#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
|
||||
|
||||
int main()
|
||||
{
|
||||
#if !defined(HAVE_CUDA)
|
||||
std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
|
||||
#endif
|
||||
|
||||
#if !defined(HAVE_TBB)
|
||||
std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include "opencv2/core/internal.hpp" // For TBB wrappers
|
||||
|
||||
using namespace std;
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
struct Worker { void operator()(int device_id) const; };
|
||||
///////////////////////////////////////////////////////////
|
||||
// Thread
|
||||
// OS-specific wrappers for multi-threading
|
||||
|
||||
// GPUs data
|
||||
GpuMat d_left[2];
|
||||
GpuMat d_right[2];
|
||||
StereoBM_GPU* bm[2];
|
||||
GpuMat d_result[2];
|
||||
|
||||
static void printHelp()
|
||||
#ifdef WIN32
|
||||
class Thread
|
||||
{
|
||||
std::cout << "Usage: stereo_multi_gpu --left <image> --right <image>\n";
|
||||
struct UserData
|
||||
{
|
||||
void (*func)(void* userData);
|
||||
void* param;
|
||||
};
|
||||
|
||||
static DWORD WINAPI WinThreadFunction(LPVOID lpParam)
|
||||
{
|
||||
UserData* userData = static_cast<UserData*>(lpParam);
|
||||
|
||||
userData->func(userData->param);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
UserData userData_;
|
||||
HANDLE thread_;
|
||||
DWORD threadId_;
|
||||
|
||||
public:
|
||||
Thread(void (*func)(void* userData), void* userData)
|
||||
{
|
||||
userData_.func = func;
|
||||
userData_.param = userData;
|
||||
|
||||
thread_ = CreateThread(
|
||||
NULL, // default security attributes
|
||||
0, // use default stack size
|
||||
WinThreadFunction, // thread function name
|
||||
&userData_, // argument to thread function
|
||||
0, // use default creation flags
|
||||
&threadId_); // returns the thread identifier
|
||||
}
|
||||
|
||||
~Thread()
|
||||
{
|
||||
CloseHandle(thread_);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
WaitForSingleObject(thread_, INFINITE);
|
||||
}
|
||||
};
|
||||
#else
|
||||
class Thread
|
||||
{
|
||||
struct UserData
|
||||
{
|
||||
void (*func)(void* userData);
|
||||
void* param;
|
||||
};
|
||||
|
||||
static void* PThreadFunction(void* lpParam)
|
||||
{
|
||||
UserData* userData = static_cast<UserData*>(lpParam);
|
||||
|
||||
userData->func(userData->param);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
pthread_t thread_;
|
||||
UserData userData_;
|
||||
|
||||
public:
|
||||
Thread(void (*func)(void* userData), void* userData)
|
||||
{
|
||||
userData_.func = func;
|
||||
userData_.param = userData;
|
||||
|
||||
pthread_create(&thread_, NULL, PThreadFunction, &userData_);
|
||||
}
|
||||
|
||||
~Thread()
|
||||
{
|
||||
pthread_detach(thread_);
|
||||
}
|
||||
|
||||
void wait()
|
||||
{
|
||||
pthread_join(thread_, NULL);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// StereoSingleGpu
|
||||
// Run Stereo algorithm on single GPU
|
||||
|
||||
class StereoSingleGpu
|
||||
{
|
||||
public:
|
||||
explicit StereoSingleGpu(int deviceId = 0);
|
||||
~StereoSingleGpu();
|
||||
|
||||
void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
|
||||
|
||||
private:
|
||||
int deviceId_;
|
||||
GpuMat d_leftFrame;
|
||||
GpuMat d_rightFrame;
|
||||
GpuMat d_disparity;
|
||||
Ptr<StereoBM_GPU> d_alg;
|
||||
};
|
||||
|
||||
StereoSingleGpu::StereoSingleGpu(int deviceId) : deviceId_(deviceId)
|
||||
{
|
||||
gpu::setDevice(deviceId_);
|
||||
d_alg = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256);
|
||||
}
|
||||
|
||||
StereoSingleGpu::~StereoSingleGpu()
|
||||
{
|
||||
gpu::setDevice(deviceId_);
|
||||
d_leftFrame.release();
|
||||
d_rightFrame.release();
|
||||
d_disparity.release();
|
||||
d_alg.release();
|
||||
}
|
||||
|
||||
void StereoSingleGpu::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
|
||||
{
|
||||
gpu::setDevice(deviceId_);
|
||||
d_leftFrame.upload(leftFrame);
|
||||
d_rightFrame.upload(rightFrame);
|
||||
(*d_alg)(d_leftFrame, d_rightFrame, d_disparity);
|
||||
d_disparity.download(disparity);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// StereoMultiGpuThread
|
||||
// Run Stereo algorithm on two GPUs using different host threads
|
||||
|
||||
class StereoMultiGpuThread
|
||||
{
|
||||
public:
|
||||
StereoMultiGpuThread();
|
||||
~StereoMultiGpuThread();
|
||||
|
||||
void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity);
|
||||
|
||||
private:
|
||||
GpuMat d_leftFrames[2];
|
||||
GpuMat d_rightFrames[2];
|
||||
GpuMat d_disparities[2];
|
||||
Ptr<StereoBM_GPU> d_algs[2];
|
||||
|
||||
struct StereoLaunchData
|
||||
{
|
||||
int deviceId;
|
||||
Mat leftFrame;
|
||||
Mat rightFrame;
|
||||
Mat disparity;
|
||||
GpuMat* d_leftFrame;
|
||||
GpuMat* d_rightFrame;
|
||||
GpuMat* d_disparity;
|
||||
Ptr<StereoBM_GPU> d_alg;
|
||||
};
|
||||
|
||||
static void launchGpuStereoAlg(void* userData);
|
||||
};
|
||||
|
||||
StereoMultiGpuThread::StereoMultiGpuThread()
|
||||
{
|
||||
gpu::setDevice(0);
|
||||
d_algs[0] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256);
|
||||
|
||||
gpu::setDevice(1);
|
||||
d_algs[1] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256);
|
||||
}
|
||||
|
||||
StereoMultiGpuThread::~StereoMultiGpuThread()
|
||||
{
|
||||
gpu::setDevice(0);
|
||||
d_leftFrames[0].release();
|
||||
d_rightFrames[0].release();
|
||||
d_disparities[0].release();
|
||||
d_algs[0].release();
|
||||
|
||||
gpu::setDevice(1);
|
||||
d_leftFrames[1].release();
|
||||
d_rightFrames[1].release();
|
||||
d_disparities[1].release();
|
||||
d_algs[1].release();
|
||||
}
|
||||
|
||||
void StereoMultiGpuThread::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity)
|
||||
{
|
||||
disparity.create(leftFrame.size(), CV_8UC1);
|
||||
|
||||
// Split input data onto two parts for each GPUs.
|
||||
// We add small border for each part,
|
||||
// because original algorithm doesn't calculate disparity on image borders.
|
||||
// With such padding we will get output in the middle of final result.
|
||||
|
||||
StereoLaunchData launchDatas[2];
|
||||
|
||||
launchDatas[0].deviceId = 0;
|
||||
launchDatas[0].leftFrame = leftFrame.rowRange(0, leftFrame.rows / 2 + 32);
|
||||
launchDatas[0].rightFrame = rightFrame.rowRange(0, rightFrame.rows / 2 + 32);
|
||||
launchDatas[0].disparity = disparity.rowRange(0, leftFrame.rows / 2);
|
||||
launchDatas[0].d_leftFrame = &d_leftFrames[0];
|
||||
launchDatas[0].d_rightFrame = &d_rightFrames[0];
|
||||
launchDatas[0].d_disparity = &d_disparities[0];
|
||||
launchDatas[0].d_alg = d_algs[0];
|
||||
|
||||
launchDatas[1].deviceId = 1;
|
||||
launchDatas[1].leftFrame = leftFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
|
||||
launchDatas[1].rightFrame = rightFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows);
|
||||
launchDatas[1].disparity = disparity.rowRange(leftFrame.rows / 2, leftFrame.rows);
|
||||
launchDatas[1].d_leftFrame = &d_leftFrames[1];
|
||||
launchDatas[1].d_rightFrame = &d_rightFrames[1];
|
||||
launchDatas[1].d_disparity = &d_disparities[1];
|
||||
launchDatas[1].d_alg = d_algs[1];
|
||||
|
||||
Thread thread0(launchGpuStereoAlg, &launchDatas[0]);
|
||||
Thread thread1(launchGpuStereoAlg, &launchDatas[1]);
|
||||
|
||||
thread0.wait();
|
||||
thread1.wait();
|
||||
}
|
||||
|
||||
void StereoMultiGpuThread::launchGpuStereoAlg(void* userData)
|
||||
{
|
||||
StereoLaunchData* data = static_cast<StereoLaunchData*>(userData);
|
||||
|
||||
gpu::setDevice(data->deviceId);
|
||||
data->d_leftFrame->upload(data->leftFrame);
|
||||
data->d_rightFrame->upload(data->rightFrame);
|
||||
(*data->d_alg)(*data->d_leftFrame, *data->d_rightFrame, *data->d_disparity);
|
||||
|
||||
if (data->deviceId == 0)
|
||||
data->d_disparity->rowRange(0, data->d_disparity->rows - 32).download(data->disparity);
|
||||
else
|
||||
data->d_disparity->rowRange(32, data->d_disparity->rows).download(data->disparity);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// StereoMultiGpuStream
|
||||
// Run Stereo algorithm on two GPUs from single host thread using async API
|
||||
|
||||
class StereoMultiGpuStream
|
||||
{
|
||||
public:
|
||||
StereoMultiGpuStream();
|
||||
~StereoMultiGpuStream();
|
||||
|
||||
void compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity);
|
||||
|
||||
private:
|
||||
GpuMat d_leftFrames[2];
|
||||
GpuMat d_rightFrames[2];
|
||||
GpuMat d_disparities[2];
|
||||
Ptr<StereoBM_GPU> d_algs[2];
|
||||
Ptr<Stream> streams[2];
|
||||
};
|
||||
|
||||
StereoMultiGpuStream::StereoMultiGpuStream()
|
||||
{
|
||||
gpu::setDevice(0);
|
||||
d_algs[0] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256);
|
||||
streams[0] = new Stream;
|
||||
|
||||
gpu::setDevice(1);
|
||||
d_algs[1] = new StereoBM_GPU(StereoBM_GPU::BASIC_PRESET, 256);
|
||||
streams[1] = new Stream;
|
||||
}
|
||||
|
||||
StereoMultiGpuStream::~StereoMultiGpuStream()
|
||||
{
|
||||
gpu::setDevice(0);
|
||||
d_leftFrames[0].release();
|
||||
d_rightFrames[0].release();
|
||||
d_disparities[0].release();
|
||||
d_algs[0].release();
|
||||
streams[0].release();
|
||||
|
||||
gpu::setDevice(1);
|
||||
d_leftFrames[1].release();
|
||||
d_rightFrames[1].release();
|
||||
d_disparities[1].release();
|
||||
d_algs[1].release();
|
||||
streams[1].release();
|
||||
}
|
||||
|
||||
void StereoMultiGpuStream::compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity)
|
||||
{
|
||||
disparity.create(leftFrame.size(), CV_8UC1);
|
||||
|
||||
// Split input data onto two parts for each GPUs.
|
||||
// We add small border for each part,
|
||||
// because original algorithm doesn't calculate disparity on image borders.
|
||||
// With such padding we will get output in the middle of final result.
|
||||
|
||||
Mat leftFrameHdr = leftFrame.createMatHeader();
|
||||
Mat rightFrameHdr = rightFrame.createMatHeader();
|
||||
Mat disparityHdr = disparity.createMatHeader();
|
||||
Mat disparityPart0 = disparityHdr.rowRange(0, leftFrame.rows / 2);
|
||||
Mat disparityPart1 = disparityHdr.rowRange(leftFrame.rows / 2, leftFrame.rows);
|
||||
|
||||
gpu::setDevice(0);
|
||||
streams[0]->enqueueUpload(leftFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), d_leftFrames[0]);
|
||||
streams[0]->enqueueUpload(rightFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), d_rightFrames[0]);
|
||||
(*d_algs[0])(d_leftFrames[0], d_rightFrames[0], d_disparities[0], *streams[0]);
|
||||
streams[0]->enqueueDownload(d_disparities[0].rowRange(0, leftFrame.rows / 2), disparityPart0);
|
||||
|
||||
gpu::setDevice(1);
|
||||
streams[1]->enqueueUpload(leftFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), d_leftFrames[1]);
|
||||
streams[1]->enqueueUpload(rightFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), d_rightFrames[1]);
|
||||
(*d_algs[1])(d_leftFrames[1], d_rightFrames[1], d_disparities[1], *streams[1]);
|
||||
streams[1]->enqueueDownload(d_disparities[1].rowRange(32, d_disparities[1].rows), disparityPart1);
|
||||
|
||||
gpu::setDevice(0);
|
||||
streams[0]->waitForCompletion();
|
||||
|
||||
gpu::setDevice(1);
|
||||
streams[1]->waitForCompletion();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// main
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc < 5)
|
||||
if (argc != 3)
|
||||
{
|
||||
printHelp();
|
||||
cerr << "Usage: stereo_multi_gpu <left_video> <right_video>" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
int num_devices = getCudaEnabledDeviceCount();
|
||||
if (num_devices < 2)
|
||||
const int numDevices = getCudaEnabledDeviceCount();
|
||||
if (numDevices != 2)
|
||||
{
|
||||
std::cout << "Two or more GPUs are required\n";
|
||||
cerr << "Two GPUs are required" << endl;
|
||||
return -1;
|
||||
}
|
||||
for (int i = 0; i < num_devices; ++i)
|
||||
{
|
||||
cv::gpu::printShortCudaDeviceInfo(i);
|
||||
|
||||
DeviceInfo dev_info(i);
|
||||
if (!dev_info.isCompatible())
|
||||
for (int i = 0; i < numDevices; ++i)
|
||||
{
|
||||
DeviceInfo devInfo(i);
|
||||
if (!devInfo.isCompatible())
|
||||
{
|
||||
std::cout << "GPU module isn't built for GPU #" << i << " ("
|
||||
<< dev_info.name() << ", CC " << dev_info.majorVersion()
|
||||
<< dev_info.minorVersion() << "\n";
|
||||
cerr << "GPU module was't built for GPU #" << i << " ("
|
||||
<< devInfo.name() << ", CC " << devInfo.majorVersion()
|
||||
<< devInfo.minorVersion() << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
printShortCudaDeviceInfo(i);
|
||||
}
|
||||
|
||||
// Load input data
|
||||
Mat left, right;
|
||||
for (int i = 1; i < argc; ++i)
|
||||
VideoCapture leftVideo(argv[1]);
|
||||
VideoCapture rightVideo(argv[2]);
|
||||
|
||||
if (!leftVideo.isOpened())
|
||||
{
|
||||
if (string(argv[i]) == "--left")
|
||||
cerr << "Can't open " << argv[1] << " video file" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!rightVideo.isOpened())
|
||||
{
|
||||
cerr << "Can't open " << argv[2] << " video file" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
cout << endl;
|
||||
cout << "This sample demonstrates working on one piece of data using two GPUs." << endl;
|
||||
cout << "It splits input into two parts and processes them separately on different GPUs." << endl;
|
||||
cout << endl;
|
||||
|
||||
Mat leftFrame, rightFrame;
|
||||
CudaMem leftGrayFrame, rightGrayFrame;
|
||||
|
||||
StereoSingleGpu gpu0Alg(0);
|
||||
StereoSingleGpu gpu1Alg(1);
|
||||
StereoMultiGpuThread multiThreadAlg;
|
||||
StereoMultiGpuStream multiStreamAlg;
|
||||
|
||||
Mat disparityGpu0;
|
||||
Mat disparityGpu1;
|
||||
Mat disparityMultiThread;
|
||||
CudaMem disparityMultiStream;
|
||||
|
||||
Mat disparityGpu0Show;
|
||||
Mat disparityGpu1Show;
|
||||
Mat disparityMultiThreadShow;
|
||||
Mat disparityMultiStreamShow;
|
||||
|
||||
TickMeter tm;
|
||||
|
||||
cout << "-------------------------------------------------------------------" << endl;
|
||||
cout << "| Frame | GPU 0 ms | GPU 1 ms | Multi Thread ms | Multi Stream ms |" << endl;
|
||||
cout << "-------------------------------------------------------------------" << endl;
|
||||
|
||||
for (int i = 0;; ++i)
|
||||
{
|
||||
leftVideo >> leftFrame;
|
||||
rightVideo >> rightFrame;
|
||||
|
||||
if (leftFrame.empty() || rightFrame.empty())
|
||||
break;
|
||||
|
||||
if (leftFrame.size() != rightFrame.size())
|
||||
{
|
||||
left = imread(argv[++i], CV_LOAD_IMAGE_GRAYSCALE);
|
||||
CV_Assert(!left.empty());
|
||||
}
|
||||
else if (string(argv[i]) == "--right")
|
||||
{
|
||||
right = imread(argv[++i], CV_LOAD_IMAGE_GRAYSCALE);
|
||||
CV_Assert(!right.empty());
|
||||
}
|
||||
else if (string(argv[i]) == "--help")
|
||||
{
|
||||
printHelp();
|
||||
cerr << "Frames have different sizes" << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
leftGrayFrame.create(leftFrame.size(), CV_8UC1);
|
||||
rightGrayFrame.create(leftFrame.size(), CV_8UC1);
|
||||
|
||||
cvtColor(leftFrame, leftGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
|
||||
cvtColor(rightFrame, rightGrayFrame.createMatHeader(), COLOR_BGR2GRAY);
|
||||
|
||||
tm.reset(); tm.start();
|
||||
gpu0Alg.compute(leftGrayFrame, rightGrayFrame, disparityGpu0);
|
||||
tm.stop();
|
||||
|
||||
const double gpu0Time = tm.getTimeMilli();
|
||||
|
||||
tm.reset(); tm.start();
|
||||
gpu1Alg.compute(leftGrayFrame, rightGrayFrame, disparityGpu1);
|
||||
tm.stop();
|
||||
|
||||
const double gpu1Time = tm.getTimeMilli();
|
||||
|
||||
tm.reset(); tm.start();
|
||||
multiThreadAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiThread);
|
||||
tm.stop();
|
||||
|
||||
const double multiThreadTime = tm.getTimeMilli();
|
||||
|
||||
tm.reset(); tm.start();
|
||||
multiStreamAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiStream);
|
||||
tm.stop();
|
||||
|
||||
const double multiStreamTime = tm.getTimeMilli();
|
||||
|
||||
cout << "| " << setw(5) << i << " | "
|
||||
<< setw(8) << setprecision(1) << fixed << gpu0Time << " | "
|
||||
<< setw(8) << setprecision(1) << fixed << gpu1Time << " | "
|
||||
<< setw(15) << setprecision(1) << fixed << multiThreadTime << " | "
|
||||
<< setw(15) << setprecision(1) << fixed << multiStreamTime << " |" << endl;
|
||||
|
||||
resize(disparityGpu0, disparityGpu0Show, Size(1024, 768), 0, 0, INTER_AREA);
|
||||
resize(disparityGpu1, disparityGpu1Show, Size(1024, 768), 0, 0, INTER_AREA);
|
||||
resize(disparityMultiThread, disparityMultiThreadShow, Size(1024, 768), 0, 0, INTER_AREA);
|
||||
resize(disparityMultiStream.createMatHeader(), disparityMultiStreamShow, Size(1024, 768), 0, 0, INTER_AREA);
|
||||
|
||||
imshow("disparityGpu0", disparityGpu0Show);
|
||||
imshow("disparityGpu1", disparityGpu1Show);
|
||||
imshow("disparityMultiThread", disparityMultiThreadShow);
|
||||
imshow("disparityMultiStream", disparityMultiStreamShow);
|
||||
|
||||
const int key = waitKey(30) & 0xff;
|
||||
if (key == 27)
|
||||
break;
|
||||
}
|
||||
|
||||
// Split source images for processing on the GPU #0
|
||||
setDevice(0);
|
||||
d_left[0].upload(left.rowRange(0, left.rows / 2));
|
||||
d_right[0].upload(right.rowRange(0, right.rows / 2));
|
||||
bm[0] = new StereoBM_GPU();
|
||||
cout << "-------------------------------------------------------------------" << endl;
|
||||
|
||||
// Split source images for processing on the GPU #1
|
||||
setDevice(1);
|
||||
d_left[1].upload(left.rowRange(left.rows / 2, left.rows));
|
||||
d_right[1].upload(right.rowRange(right.rows / 2, right.rows));
|
||||
bm[1] = new StereoBM_GPU();
|
||||
|
||||
// Execute calculation in two threads using two GPUs
|
||||
int devices[] = {0, 1};
|
||||
parallel_do(devices, devices + 2, Worker());
|
||||
|
||||
// Release the first GPU resources
|
||||
setDevice(0);
|
||||
imshow("GPU #0 result", Mat(d_result[0]));
|
||||
d_left[0].release();
|
||||
d_right[0].release();
|
||||
d_result[0].release();
|
||||
delete bm[0];
|
||||
|
||||
// Release the second GPU resources
|
||||
setDevice(1);
|
||||
imshow("GPU #1 result", Mat(d_result[1]));
|
||||
d_left[1].release();
|
||||
d_right[1].release();
|
||||
d_result[1].release();
|
||||
delete bm[1];
|
||||
|
||||
waitKey();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void Worker::operator()(int device_id) const
|
||||
{
|
||||
setDevice(device_id);
|
||||
|
||||
bm[device_id]->operator()(d_left[device_id], d_right[device_id],
|
||||
d_result[device_id]);
|
||||
|
||||
std::cout << "GPU #" << device_id << " (" << DeviceInfo().name()
|
||||
<< "): finished\n";
|
||||
}
|
||||
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user