parallel_do replaced with parallel_for_ in driver_gpu_multi sample.

2015-01-05 13:48:54 +03:00
parent 091c7a3821
commit 72063bf136
1 changed files with 70 additions and 82 deletions
--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@@ -7,42 +7,24 @@
 #endif
 #include <iostream>
 #include "cvconfig.h"
 #include "opencv2/core/core.hpp"
 #include "opencv2/gpu/gpu.hpp"
-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
+#if defined(__arm__)
 int main()
 {
 #if !defined(HAVE_CUDA)
    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
 #endif
 #if !defined(HAVE_TBB)
    std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
 #endif
 #if defined(__arm__)
    std::cout << "Unsupported for ARM CUDA library." << std::endl;
 #endif
    return 0;
 }
 #else
 #include <cuda.h>
 #include <cuda_runtime.h>
 #include "opencv2/core/internal.hpp" // For TBB wrappers
 using namespace std;
 using namespace cv;
 using namespace cv::gpu;
 struct Worker { void operator()(int device_id) const; };
 void destroyContexts();
 #define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
 inline void safeCall_(int code, const char* expr, const char* file, int line)
 {
@@ -50,65 +32,28 @@ inline void safeCall_(int code, const char* expr, const char* file, int line)
    {
        std::cout << "CUDA driver API error: code " << code << ", expr " << expr
        << ", file " << file << ", line " << line << endl;
        destroyContexts();
        exit(-1);
    }
 }
-// Each GPU is associated with its own context
+struct Worker: public ParallelLoopBody
 CUcontext contexts[2];
 int main()
 {
-    int num_devices = getCudaEnabledDeviceCount();
+    Worker(int num_devices)
    if (num_devices < 2)
    {
-        std::cout << "Two or more GPUs are required\n";
+        count = num_devices;
-        return -1;
+        contexts = new contexts CUcontext[num_devices];
-    }
+        for (int device_id = 0; i < num_devices; device_id++)
    for (int i = 0; i < num_devices; ++i)
        {
        cv::gpu::printShortCudaDeviceInfo(i);
        DeviceInfo dev_info(i);
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
                 << dev_info.name() << ", CC " << dev_info.majorVersion()
                 << dev_info.minorVersion() << "\n";
            return -1;
        }
    }
    // Init CUDA Driver API
    safeCall(cuInit(0));
    // Create context for GPU #0
            CUdevice device;
-    safeCall(cuDeviceGet(&device, 0));
+            safeCall(cuDeviceGet(&device, device_id));
-    safeCall(cuCtxCreate(&contexts[0], 0, device));
+            safeCall(cuCtxCreate(&contexts[device_id], 0, device));
        }
    }
-    CUcontext prev_context;
+    virtual void operator() (const Range& range) const
-    safeCall(cuCtxPopCurrent(&prev_context));
+    {
-
+        for (int device_id = range.start; device_id != range.end; ++device_id)
-    // Create context for GPU #1
+        {
    safeCall(cuDeviceGet(&device, 1));
    safeCall(cuCtxCreate(&contexts[1], 0, device));
    safeCall(cuCtxPopCurrent(&prev_context));
    // Execute calculation in two threads using two GPUs
    int devices[] = {0, 1};
    parallel_do(devices, devices + 2, Worker());
    destroyContexts();
    return 0;
 }
 void Worker::operator()(int device_id) const
 {
            // Set the proper context
            safeCall(cuCtxPushCurrent(contexts[device_id]));
@@ -138,13 +83,56 @@ void Worker::operator()(int device_id) const
            CUcontext prev_context;
            safeCall(cuCtxPopCurrent(&prev_context));
-}
+        }
    }
    ~Worker()
    {
        if ((contexts != NULL) && count != 0)
        {
            for (int device_id = 0; i < num_devices; device_id++)
            {
                safeCall(cuCtxDestroy(contexts[device_id]));
            }
-void destroyContexts()
+            delete[] contexts;
        }
    }
    CUcontext* contexts;
    int count;
 };
 int main()
 {
-    safeCall(cuCtxDestroy(contexts[0]));
+    int num_devices = getCudaEnabledDeviceCount();
-    safeCall(cuCtxDestroy(contexts[1]));
+    if (num_devices < 2)
    {
        std::cout << "Two or more GPUs are required\n";
        return -1;
    }
    for (int i = 0; i < num_devices; ++i)
    {
        cv::gpu::printShortCudaDeviceInfo(i);
        DeviceInfo dev_info(i);
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
                 << dev_info.name() << ", CC " << dev_info.majorVersion()
                 << dev_info.minorVersion() << "\n";
            return -1;
        }
    }
    // Init CUDA Driver API
    safeCall(cuInit(0));
    // Execute calculation
    parallel_for_(cv::Range(0, num_devices, Worker(num_devices));
    return 0;
 }
 #endif