parallel_do replaced with parallel_for_ in driver_gpu_multi sample.

2015-01-05 13:48:54 +03:00
parent 091c7a3821
commit 72063bf136
1 changed files with 70 additions and 82 deletions
--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@@ -7,42 +7,24 @@
 #endif

 #include <iostream>
-#include "cvconfig.h"
 #include "opencv2/core/core.hpp"
 #include "opencv2/gpu/gpu.hpp"

-#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) || defined(__arm__)
-
+#if defined(__arm__)
 int main()
 {
-#if !defined(HAVE_CUDA)
-    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
-#endif
-
-#if !defined(HAVE_TBB)
-    std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
-#endif
-
-#if defined(__arm__)
    std::cout << "Unsupported for ARM CUDA library." << std::endl;
-#endif
-
    return 0;
 }
-
 #else

 #include <cuda.h>
 #include <cuda_runtime.h>
-#include "opencv2/core/internal.hpp" // For TBB wrappers

 using namespace std;
 using namespace cv;
 using namespace cv::gpu;

-struct Worker { void operator()(int device_id) const; };
-void destroyContexts();
-
 #define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
 inline void safeCall_(int code, const char* expr, const char* file, int line)
 {
@@ -50,65 +32,28 @@ inline void safeCall_(int code, const char* expr, const char* file, int line)
    {
        std::cout << "CUDA driver API error: code " << code << ", expr " << expr
        << ", file " << file << ", line " << line << endl;
-        destroyContexts();
        exit(-1);
    }
 }

-// Each GPU is associated with its own context
-CUcontext contexts[2];
-
-int main()
+struct Worker: public ParallelLoopBody
 {
-    int num_devices = getCudaEnabledDeviceCount();
-    if (num_devices < 2)
+    Worker(int num_devices)
    {
-        std::cout << "Two or more GPUs are required\n";
-        return -1;
-    }
-
-    for (int i = 0; i < num_devices; ++i)
+        count = num_devices;
+        contexts = new contexts CUcontext[num_devices];
+        for (int device_id = 0; i < num_devices; device_id++)
        {
-        cv::gpu::printShortCudaDeviceInfo(i);
-
-        DeviceInfo dev_info(i);
-        if (!dev_info.isCompatible())
-        {
-            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
-                 << dev_info.minorVersion() << "\n";
-            return -1;
-        }
-    }
-
-    // Init CUDA Driver API
-    safeCall(cuInit(0));
-
-    // Create context for GPU #0
            CUdevice device;
-    safeCall(cuDeviceGet(&device, 0));
-    safeCall(cuCtxCreate(&contexts[0], 0, device));
+            safeCall(cuDeviceGet(&device, device_id));
+            safeCall(cuCtxCreate(&contexts[device_id], 0, device));
+        }
+    }

-    CUcontext prev_context;
-    safeCall(cuCtxPopCurrent(&prev_context));
-
-    // Create context for GPU #1
-    safeCall(cuDeviceGet(&device, 1));
-    safeCall(cuCtxCreate(&contexts[1], 0, device));
-
-    safeCall(cuCtxPopCurrent(&prev_context));
-
-    // Execute calculation in two threads using two GPUs
-    int devices[] = {0, 1};
-    parallel_do(devices, devices + 2, Worker());
-
-    destroyContexts();
-    return 0;
-}
-
-
-void Worker::operator()(int device_id) const
-{
+    virtual void operator() (const Range& range) const
+    {
+        for (int device_id = range.start; device_id != range.end; ++device_id)
+        {
            // Set the proper context
            safeCall(cuCtxPushCurrent(contexts[device_id]));

@@ -138,13 +83,56 @@ void Worker::operator()(int device_id) const

            CUcontext prev_context;
            safeCall(cuCtxPopCurrent(&prev_context));
-}
+        }
+    }

+    ~Worker()
+    {
+        if ((contexts != NULL) && count != 0)
+        {
+            for (int device_id = 0; i < num_devices; device_id++)
+            {
+                safeCall(cuCtxDestroy(contexts[device_id]));
+            }

-void destroyContexts()
+            delete[] contexts;
+        }
+    }
+
+    CUcontext* contexts;
+    int count;
+};
+
+int main()
 {
-    safeCall(cuCtxDestroy(contexts[0]));
-    safeCall(cuCtxDestroy(contexts[1]));
+    int num_devices = getCudaEnabledDeviceCount();
+    if (num_devices < 2)
+    {
+        std::cout << "Two or more GPUs are required\n";
+        return -1;
+    }
+
+    for (int i = 0; i < num_devices; ++i)
+    {
+        cv::gpu::printShortCudaDeviceInfo(i);
+
+        DeviceInfo dev_info(i);
+        if (!dev_info.isCompatible())
+        {
+            std::cout << "GPU module isn't built for GPU #" << i << " ("
+                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.minorVersion() << "\n";
+            return -1;
+        }
+    }
+
+    // Init CUDA Driver API
+    safeCall(cuInit(0));
+
+    // Execute calculation
+    parallel_for_(cv::Range(0, num_devices, Worker(num_devices));
+
+    return 0;
 }

 #endif