finished multi_gpu sample
This commit is contained in:
@@ -31,18 +31,20 @@ using namespace std;
|
|||||||
using namespace cv;
|
using namespace cv;
|
||||||
using namespace cv::gpu;
|
using namespace cv::gpu;
|
||||||
|
|
||||||
|
|
||||||
void cuSafeCall(int code);
|
|
||||||
struct Worker { void operator()(int device_id) const; };
|
struct Worker { void operator()(int device_id) const; };
|
||||||
void destroy();
|
void destroyContexts();
|
||||||
|
|
||||||
|
#define cuSafeCall(code) if (code != CUDA_SUCCESS) { \
|
||||||
|
cout << "CUDA driver API error: code " << code \
|
||||||
|
<< ", file " << __FILE__ << ", line " << __LINE__ << endl; \
|
||||||
|
destroyContexts(); \
|
||||||
|
exit(-1); \
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Each GPU is associated with its own context
|
// Each GPU is associated with its own context
|
||||||
CUcontext contexts[2];
|
CUcontext contexts[2];
|
||||||
|
|
||||||
// Auxiliary variable, stores previusly used context
|
|
||||||
CUcontext prev_context;
|
|
||||||
|
|
||||||
|
|
||||||
int main()
|
int main()
|
||||||
{
|
{
|
||||||
@@ -52,50 +54,61 @@ int main()
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Save the default context
|
cuSafeCall(cuInit(0));
|
||||||
cuSafeCall(cuCtxAttach(&contexts[0], 0));
|
|
||||||
cuSafeCall(cuCtxDetach(contexts[0]));
|
|
||||||
|
|
||||||
// Create new context for the second GPU
|
// Create context for the first GPU
|
||||||
CUdevice device;
|
CUdevice device;
|
||||||
|
cuSafeCall(cuDeviceGet(&device, 0));
|
||||||
|
cuSafeCall(cuCtxCreate(&contexts[0], 0, device));
|
||||||
|
|
||||||
|
CUcontext prev_context;
|
||||||
|
cuCtxPopCurrent(&prev_context);
|
||||||
|
|
||||||
|
// Create context for the second GPU
|
||||||
cuSafeCall(cuDeviceGet(&device, 1));
|
cuSafeCall(cuDeviceGet(&device, 1));
|
||||||
cuSafeCall(cuCtxCreate(&contexts[1], 0, device));
|
cuSafeCall(cuCtxCreate(&contexts[1], 1, device));
|
||||||
|
|
||||||
// Restore the first GPU context
|
cuCtxPopCurrent(&prev_context);
|
||||||
cuSafeCall(cuCtxPopCurrent(&prev_context));
|
|
||||||
|
|
||||||
// Run
|
// Execute calculation in two threads using two GPUs
|
||||||
int devices[] = {0, 1};
|
int devices[] = {0, 1};
|
||||||
parallel_do(devices, devices + 2, Worker());
|
parallel_do(devices, devices + 2, Worker());
|
||||||
|
|
||||||
// Destroy context of the second GPU
|
destroyContexts();
|
||||||
destroy();
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Worker::operator()(int device_id) const
|
void Worker::operator()(int device_id) const
|
||||||
{
|
{
|
||||||
cout << device_id << endl;
|
cuCtxPushCurrent(contexts[device_id]);
|
||||||
|
|
||||||
|
// Generate random matrix
|
||||||
|
Mat src(1000, 1000, CV_32F);
|
||||||
|
RNG rng(0);
|
||||||
|
rng.fill(src, RNG::UNIFORM, 0, 1);
|
||||||
|
|
||||||
|
// Upload data on GPU
|
||||||
|
GpuMat d_src(src);
|
||||||
|
GpuMat d_dst;
|
||||||
|
|
||||||
|
transpose(d_src, d_dst);
|
||||||
|
|
||||||
|
// Deallocate here, otherwise deallocation will be performed
|
||||||
|
// after context is extracted from the stack
|
||||||
|
d_src.release();
|
||||||
|
d_dst.release();
|
||||||
|
|
||||||
|
CUcontext prev_context;
|
||||||
|
cuCtxPopCurrent(&prev_context);
|
||||||
|
|
||||||
|
cout << "Device " << device_id << " finished\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void cuSafeCall(int code)
|
void destroyContexts()
|
||||||
{
|
|
||||||
if (code != CUDA_SUCCESS)
|
|
||||||
{
|
|
||||||
cout << "CUDA driver API error: code " << code
|
|
||||||
<< ", file " << __FILE__
|
|
||||||
<< ", line " << __LINE__ << endl;
|
|
||||||
destroy();
|
|
||||||
exit(-1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void destroy()
|
|
||||||
{
|
{
|
||||||
|
cuCtxDestroy(contexts[0]);
|
||||||
cuCtxDestroy(contexts[1]);
|
cuCtxDestroy(contexts[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user