ocl: split initialization.cpp into 3 files: context, operations, programcache

2013-09-20 19:19:52 +04:00
parent 8e75947a7d
commit e8d9ed8955
36 changed files with 1699 additions and 1534 deletions
--- a/modules/ocl/include/opencv2/ocl/ocl.hpp
+++ b/modules/ocl/include/opencv2/ocl/ocl.hpp
@@ -57,8 +57,7 @@ namespace cv
 {
    namespace ocl
    {
-        using std::auto_ptr;
-        enum
+        enum DeviceType
        {
            CVCL_DEVICE_TYPE_DEFAULT     = (1 << 0),
            CVCL_DEVICE_TYPE_CPU         = (1 << 1),
@@ -93,77 +92,113 @@ namespace cv
        //return -1 if the target type is unsupported, otherwise return 0
        CV_EXPORTS int setDevMemType(DevMemRW rw_type = DEVICE_MEM_R_W, DevMemType mem_type = DEVICE_MEM_DEFAULT);

-        //this class contains ocl runtime information
-        class CV_EXPORTS Info
+        // these classes contain OpenCL runtime information
+
+        struct PlatformInfo;
+
+        struct DeviceInfo
        {
-        public:
-            struct Impl;
-            Impl *impl;
+            int _id; // reserved, don't use it

-            Info();
-            Info(const Info &m);
-            ~Info();
-            void release();
-            Info &operator = (const Info &m);
-            std::vector<string> DeviceName;
+            DeviceType deviceType;
+            std::string deviceProfile;
+            std::string deviceVersion;
+            std::string deviceName;
+            std::string deviceVendor;
+            int deviceVendorId;
+            std::string deviceDriverVersion;
+            std::string deviceExtensions;
+
+            size_t maxWorkGroupSize;
+            std::vector<size_t> maxWorkItemSizes;
+            int maxComputeUnits;
+            size_t localMemorySize;
+
+            int deviceVersionMajor;
+            int deviceVersionMinor;
+
+            bool haveDoubleSupport;
+            bool isUnifiedMemory; // 1 means integrated GPU, otherwise this value is 0
+
+            std::string compilationExtraOptions;
+
+            const PlatformInfo* platform;
+
+            DeviceInfo();
        };
+
+        struct PlatformInfo
+        {
+            int _id; // reserved, don't use it
+
+            std::string platformProfile;
+            std::string platformVersion;
+            std::string platformName;
+            std::string platformVendor;
+            std::string platformExtensons;
+
+            int platformVersionMajor;
+            int platformVersionMinor;
+
+            std::vector<const DeviceInfo*> devices;
+
+            PlatformInfo();
+        };
+
        //////////////////////////////// Initialization & Info ////////////////////////
-        //this function may be obsoleted
-        //CV_EXPORTS cl_device_id getDevice();
-        //the function must be called before any other cv::ocl::functions, it initialize ocl runtime
-        //each Info relates to an OpenCL platform
-        //there is one or more devices in each platform, each one has a separate name
-        CV_EXPORTS int getDevice(std::vector<Info> &oclinfo, int devicetype = CVCL_DEVICE_TYPE_GPU);
+        typedef std::vector<const PlatformInfo*> PlatformsInfo;

-        //set device you want to use, optional function after getDevice be called
-        //the devnum is the index of the selected device in DeviceName vector of INfo
-        CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0);
+        CV_EXPORTS int getOpenCLPlatforms(PlatformsInfo& platforms);

-        //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue
-        //returns cl_context *
-        CV_EXPORTS void* getoclContext();
-        //returns cl_command_queue *
-        CV_EXPORTS void* getoclCommandQueue();
+        typedef std::vector<const DeviceInfo*> DevicesInfo;

-        //explicit call clFinish. The global command queue will be used.
-        CV_EXPORTS void finish();
+        CV_EXPORTS int getOpenCLDevices(DevicesInfo& devices, int deviceType = CVCL_DEVICE_TYPE_GPU,
+                const PlatformInfo* platform = NULL);

-        //this function enable ocl module to use customized cl_context and cl_command_queue
-        //getDevice also need to be called before this function
-        CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0);
-
-        //returns true when global OpenCL context is initialized
-        CV_EXPORTS bool initialized();
+        // set device you want to use
+        CV_EXPORTS void setDevice(const DeviceInfo* info);

        //////////////////////////////// Error handling ////////////////////////
        CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func);

-        //////////////////////////////// OpenCL context ////////////////////////
-        //This is a global singleton class used to represent a OpenCL context.
+        enum FEATURE_TYPE
+        {
+            FEATURE_CL_DOUBLE = 1,
+            FEATURE_CL_UNIFIED_MEM,
+            FEATURE_CL_VER_1_2
+        };
+
+        // Represents OpenCL context, interface
        class CV_EXPORTS Context
        {
        protected:
-            Context();
-            friend class auto_ptr<Context>;
-            friend bool initialized();
-        private:
-            static auto_ptr<Context> clCxt;
-            static int val;
+            Context() { }
+            ~Context() { }
        public:
-            ~Context();
-            void release();
-            Info::Impl* impl;
-
            static Context* getContext();
-            static void setContext(Info &oclinfo);

-            enum {CL_DOUBLE, CL_UNIFIED_MEM, CL_VER_1_2};
-            bool supportsFeature(int ftype) const;
-            size_t computeUnits() const;
-            void* oclContext();
-            void* oclCommandQueue();
+            bool supportsFeature(FEATURE_TYPE featureType) const;
+            const DeviceInfo& getDeviceInfo() const;
+
+            const void* getOpenCLContextPtr() const;
+            const void* getOpenCLCommandQueuePtr() const;
+            const void* getOpenCLDeviceIDPtr() const;
        };

+        inline const void *getClContextPtr()
+        {
+            return Context::getContext()->getOpenCLContextPtr();
+        }
+
+        inline const void *getClCommandQueuePtr()
+        {
+            return Context::getContext()->getOpenCLCommandQueuePtr();
+        }
+
+        bool CV_EXPORTS supportsFeature(FEATURE_TYPE featureType);
+
+        void CV_EXPORTS finish();
+
        //! Calls a kernel, by string. Pass globalThreads = NULL, and cleanUp = true, to finally clean-up without executing.
        CV_EXPORTS double openCLExecuteKernelInterop(Context *clCxt ,
                                                        const char **source, string kernelName,
@@ -384,7 +419,7 @@ namespace cv
            uchar *dataend;

            //! OpenCL context associated with the oclMat object.
-            Context *clCxt;
+            Context *clCxt; // TODO clCtx
            //add offset for handle ROI, calculated in byte
            int offset;
            //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used
@@ -1879,11 +1914,6 @@ namespace cv
            oclMat temp5;
        };

-        static inline size_t divUp(size_t total, size_t grain)
-        {
-            return (total + grain - 1) / grain;
-        }
-
        /*!***************K Nearest Neighbour*************!*/
        class CV_EXPORTS KNearestNeighbour: public CvKNearest
        {
--- a/modules/ocl/include/opencv2/ocl/private/util.hpp
+++ b/modules/ocl/include/opencv2/ocl/private/util.hpp
@@ -52,120 +52,138 @@

 namespace cv
 {
-    namespace ocl
+namespace ocl
+{
+
+inline cl_device_id getClDeviceID(const Context *ctx)
+{
+    return *(cl_device_id*)(ctx->getOpenCLDeviceIDPtr());
+}
+
+inline cl_context getClContext(const Context *ctx)
+{
+    return *(cl_context*)(ctx->getOpenCLContextPtr());
+}
+
+inline cl_command_queue getClCommandQueue(const Context *ctx)
+{
+    return *(cl_command_queue*)(ctx->getOpenCLCommandQueuePtr());
+}
+
+enum openCLMemcpyKind
+{
+    clMemcpyHostToDevice = 0,
+    clMemcpyDeviceToHost,
+    clMemcpyDeviceToDevice
+};
+///////////////////////////OpenCL call wrappers////////////////////////////
+void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
+                                  size_t widthInBytes, size_t height);
+void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
+                                    size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
+void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
+                               const void *src, size_t spitch,
+                               size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
+void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
+                                   const void *src, size_t spitch,
+                                   size_t width, size_t height, int src_offset);
+void CV_EXPORTS openCLFree(void *devPtr);
+cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
+void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
+cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
+                                               const char **source, std::string kernelName);
+cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
+                                               const char **source, std::string kernelName, const char *build_options);
+void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
+                         int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
+void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
+                          size_t globalThreads[3], size_t localThreads[3],
+                          std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                         size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
+void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                         size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
+                         int depth, const char *build_options);
+
+cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
+                     const size_t size);
+
+cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
+
+int CV_EXPORTS savetofile(const Context *clcxt,  cl_program &program, const char *fileName);
+
+enum FLUSH_MODE
+{
+    CLFINISH = 0,
+    CLFLUSH,
+    DISABLE
+};
+
+void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                          size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
+void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
+                          size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
+                          int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
+// bind oclMat to OpenCL image textures
+// note:
+//   1. there is no memory management. User need to explicitly release the resource
+//   2. for faster clamping, there is no buffer padding for the constructed texture
+cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
+void CV_EXPORTS releaseTexture(cl_mem& texture);
+
+//Represents an image texture object
+class CV_EXPORTS TextureCL
+{
+public:
+    TextureCL(cl_mem tex, int r, int c, int t)
+        : tex_(tex), rows(r), cols(c), type(t) {}
+    ~TextureCL()
    {
-        enum openCLMemcpyKind
-        {
-            clMemcpyHostToDevice = 0,
-            clMemcpyDeviceToHost,
-            clMemcpyDeviceToDevice
-        };
-        ///////////////////////////OpenCL call wrappers////////////////////////////
-        void CV_EXPORTS openCLMallocPitch(Context *clCxt, void **dev_ptr, size_t *pitch,
-                                          size_t widthInBytes, size_t height);
-        void CV_EXPORTS openCLMallocPitchEx(Context *clCxt, void **dev_ptr, size_t *pitch,
-                                            size_t widthInBytes, size_t height, DevMemRW rw_type, DevMemType mem_type);
-        void CV_EXPORTS openCLMemcpy2D(Context *clCxt, void *dst, size_t dpitch,
-                                       const void *src, size_t spitch,
-                                       size_t width, size_t height, openCLMemcpyKind kind, int channels = -1);
-        void CV_EXPORTS openCLCopyBuffer2D(Context *clCxt, void *dst, size_t dpitch, int dst_offset,
-                                           const void *src, size_t spitch,
-                                           size_t width, size_t height, int src_offset);
-        void CV_EXPORTS openCLFree(void *devPtr);
-        cl_mem CV_EXPORTS openCLCreateBuffer(Context *clCxt, size_t flag, size_t size);
-        void CV_EXPORTS openCLReadBuffer(Context *clCxt, cl_mem dst_buffer, void *host_buffer, size_t size);
-        cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
-                                                       const char **source, std::string kernelName);
-        cl_kernel CV_EXPORTS openCLGetKernelFromSource(const Context *clCxt,
-                                                       const char **source, std::string kernelName, const char *build_options);
-        void CV_EXPORTS openCLVerifyKernel(const Context *clCxt, cl_kernel kernel, size_t *localThreads);
-        void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, string kernelName, std::vector< std::pair<size_t, const void *> > &args,
-                                 int globalcols , int globalrows, size_t blockSize = 16, int kernel_expand_depth = -1, int kernel_expand_channel = -1);
-        void CV_EXPORTS openCLExecuteKernel_(Context *clCxt , const char **source, std::string kernelName,
-                                  size_t globalThreads[3], size_t localThreads[3],
-                                  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, const char *build_options);
-        void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
-                                 size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth);
-        void CV_EXPORTS openCLExecuteKernel(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
-                                 size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                                 int depth, const char *build_options);
+        openCLFree(tex_);
+    }
+    operator cl_mem()
+    {
+        return tex_;
+    }
+    cl_mem const tex_;
+    const int rows;
+    const int cols;
+    const int type;
+private:
+    //disable assignment
+    void operator=(const TextureCL&);
+};
+// bind oclMat to OpenCL image textures and retunrs an TextureCL object
+// note:
+//   for faster clamping, there is no buffer padding for the constructed texture
+Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);

-        cl_mem CV_EXPORTS load_constant(cl_context context, cl_command_queue command_queue, const void *value,
-                             const size_t size);
+// returns whether the current context supports image2d_t format or not
+bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());

-        cl_mem CV_EXPORTS openCLMalloc(cl_context clCxt, size_t size, cl_mem_flags flags, void *host_ptr);
+bool CV_EXPORTS isCpuDevice();

-        int CV_EXPORTS savetofile(const Context *clcxt,  cl_program &program, const char *fileName);
+size_t CV_EXPORTS queryWaveFrontSize(cl_kernel kernel);

-        enum FLUSH_MODE
-        {
-            CLFINISH = 0,
-            CLFLUSH,
-            DISABLE
-        };

-        void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
-                                  size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode = DISABLE);
-        void CV_EXPORTS openCLExecuteKernel2(Context *clCxt , const char **source, std::string kernelName, size_t globalThreads[3],
-                                  size_t localThreads[3],  std::vector< std::pair<size_t, const void *> > &args, int channels,
-                                  int depth, char *build_options, FLUSH_MODE finish_mode = DISABLE);
-        // bind oclMat to OpenCL image textures
-        // note:
-        //   1. there is no memory management. User need to explicitly release the resource
-        //   2. for faster clamping, there is no buffer padding for the constructed texture
-        cl_mem CV_EXPORTS bindTexture(const oclMat &mat);
-        void CV_EXPORTS releaseTexture(cl_mem& texture);

-        //Represents an image texture object
-        class CV_EXPORTS TextureCL
-        {
-        public:
-            TextureCL(cl_mem tex, int r, int c, int t)
-                : tex_(tex), rows(r), cols(c), type(t) {}
-            ~TextureCL()
-            {
-                openCLFree(tex_);
-            }
-            operator cl_mem()
-            {
-                return tex_;
-            }
-            cl_mem const tex_;
-            const int rows;
-            const int cols;
-            const int type;
-        private:
-            //disable assignment
-            void operator=(const TextureCL&);
-        };
-        // bind oclMat to OpenCL image textures and retunrs an TextureCL object
-        // note:
-        //   for faster clamping, there is no buffer padding for the constructed texture
-        Ptr<TextureCL> CV_EXPORTS bindTexturePtr(const oclMat &mat);
+inline size_t divUp(size_t total, size_t grain)
+{
+    return (total + grain - 1) / grain;
+}

-        // returns whether the current context supports image2d_t format or not
-        bool CV_EXPORTS support_image2d(Context *clCxt = Context::getContext());
-
-        // the enums are used to query device information
-        // currently only support wavefront size queries
-        enum DEVICE_INFO
-        {
-            WAVEFRONT_SIZE,             //in AMD speak
-            IS_CPU_DEVICE               //check if the device is CPU
-        };
-        template<DEVICE_INFO _it, typename _ty>
-        _ty queryDeviceInfo(cl_kernel kernel = NULL);
-
-        template<>
-        int CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, int>(cl_kernel kernel);
-        template<>
-        size_t CV_EXPORTS queryDeviceInfo<WAVEFRONT_SIZE, size_t>(cl_kernel kernel);
-        template<>
-        bool CV_EXPORTS queryDeviceInfo<IS_CPU_DEVICE, bool>(cl_kernel kernel);
-
-        unsigned long CV_EXPORTS queryLocalMemInfo();
-    }//namespace ocl
+inline size_t roundUp(size_t sz, size_t n)
+{
+    // we don't assume that n is a power of 2 (see alignSize)
+    // equal to divUp(sz, n) * n
+    size_t t = sz + n - 1;
+    size_t rem = t % n;
+    size_t result = t - rem;
+    return result;
+}

+}//namespace ocl
 }//namespace cv

 #endif //__OPENCV_OCL_PRIVATE_UTIL__