OpenCV-OpenCL interop (PR #4072):

Commits:
added new function, cv::ocl::attachContext(String& platformName, void* platformID, void* context, void* deviceID) which allow to attach externally created OpenCL context to OpenCV.
add definitions of clRetainDevice, clRetainContext funcs
removed definitions for clRetainContext, clRetainDevice
fixed build issue under Linux
fixed uninitialized vars, replace dbgassert in error handling
remove function which is not ready yet
add new function, cv::ocl::convertFromBuffer(int rows, int cols, int type, void* cl_mem_obj, UMat& dst, UMatUsageFlags usageFlags = cv::USAGE_DEFAULT) which attaches user allocated OpenCL clBuffer to UMat
uncommented clGetMemObjectInfo definition (otherwise prevent opencv build)
fixed build issue on linux and android
add step parameter to cv::ocl::convertFromBuffer func
suppress compile-time warning
added sample opencl-opencv interoperability (showcase for cv::ocl::convertFromBuffer func)
CMakeLists.txt modified to not create sample build script if OpenCL SDK not found in system
fixed build issue (apple opencl include dir and spaces in CMake file)
added call to clRetainContext for attachContext func and call to clRetainMemObject for convertFromBuffer func
uncommented clRetainMemObject definition
added comments and cleanup
add local path to cmake modules search dirs (instead of replacing)
remove REQUIRED for find_package call (sample build together with opencv). need to try standalone sample build
opencl-interop sample moved to standalone build
set minimum version requirement for sample's cmake to 3.1
put cmake_minimum_required under condition, so do not check if samples not builded
remove code dups for setSize, updateContinuityFlag, and finalizeHdr
commented out cmake_minimum_required(VERSION 3.1)
add safety check for cmake version
add convertFromImage func and update opencl-interop sample
uncommented clGetImageInfo defs
uncommented clEnqueueCopyImageToBuffer defs
fixed clEnqueueCopyImageToBuffer defs
add doxygen comments
remove doxygen @fn tag
try to restart buildbot
add doxygen comments to directx interop funcs
remove internal header, use fwd declarations in affected compile units instead
This commit is contained in:
Vladimir Dudnik 2015-05-27 23:22:33 +03:00 committed by Alexander Alekhin
parent 6ab9df5208
commit 217dd63e02
7 changed files with 1423 additions and 18 deletions

View File

@ -71,9 +71,28 @@ using namespace cv::ocl;
//! @{
// TODO static functions in the Context class
//! @brief Creates OpenCL context from D3D11 device
//
//! @param pD3D11Device - pointer to D3D11 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromD3D11Device(ID3D11Device* pD3D11Device);
//! @brief Creates OpenCL context from D3D10 device
//
//! @param pD3D10Device - pointer to D3D10 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromD3D10Device(ID3D10Device* pD3D10Device);
//! @brief Creates OpenCL context from Direct3DDevice9Ex device
//
//! @param pDirect3DDevice9Ex - pointer to Direct3DDevice9Ex device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromDirect3DDevice9Ex(IDirect3DDevice9Ex* pDirect3DDevice9Ex);
//! @brief Creates OpenCL context from Direct3DDevice9 device
//
//! @param pDirect3DDevice9 - pointer to Direct3Device9 device
//! @return Returns reference to OpenCL Context
CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDirect3DDevice9);
//! @}
@ -83,19 +102,70 @@ CV_EXPORTS Context& initializeContextFromDirect3DDevice9(IDirect3DDevice9* pDire
//! @addtogroup core_directx
//! @{
//! @brief Converts InputArray to ID3D11Texture2D
//
//! @note Note: function does memory copy from src to
//! pD3D11Texture2D
//
//! @param src - source InputArray
//! @param pD3D11Texture2D - destination D3D11 texture
CV_EXPORTS void convertToD3D11Texture2D(InputArray src, ID3D11Texture2D* pD3D11Texture2D);
//! @brief Converts ID3D11Texture2D to OutputArray
//
//! @note Note: function does memory copy from pD3D11Texture2D
//! to dst
//
//! @param pD3D11Texture2D - source D3D11 texture
//! @param dst - destination OutputArray
CV_EXPORTS void convertFromD3D11Texture2D(ID3D11Texture2D* pD3D11Texture2D, OutputArray dst);
//! @brief Converts InputArray to ID3D10Texture2D
//
//! @note Note: function does memory copy from src to
//! pD3D10Texture2D
//
//! @param src - source InputArray
//! @param pD3D10Texture2D - destination D3D10 texture
CV_EXPORTS void convertToD3D10Texture2D(InputArray src, ID3D10Texture2D* pD3D10Texture2D);
//! @brief Converts ID3D10Texture2D to OutputArray
//
//! @note Note: function does memory copy from pD3D10Texture2D
//! to dst
//
//! @param pD3D10Texture2D - source D3D10 texture
//! @param dst - destination OutputArray
CV_EXPORTS void convertFromD3D10Texture2D(ID3D10Texture2D* pD3D10Texture2D, OutputArray dst);
//! @brief Converts InputArray to IDirect3DSurface9
//
//! @note Note: function does memory copy from src to
//! pDirect3DSurface9
//
//! @param src - source InputArray
//! @param pDirect3DSurface9 - destination D3D10 texture
//! @param surfaceSharedHandle - shared handle
CV_EXPORTS void convertToDirect3DSurface9(InputArray src, IDirect3DSurface9* pDirect3DSurface9, void* surfaceSharedHandle = NULL);
//! @brief Converts IDirect3DSurface9 to OutputArray
//
//! @note Note: function does memory copy from pDirect3DSurface9
//! to dst
//
//! @param pDirect3DSurface9 - source D3D10 texture
//! @param dst - destination OutputArray
//! @param surfaceSharedHandle - shared handle
CV_EXPORTS void convertFromDirect3DSurface9(IDirect3DSurface9* pDirect3DSurface9, OutputArray dst, void* surfaceSharedHandle = NULL);
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
//! @brief Get OpenCV type from DirectX type
//! @param iDXGI_FORMAT - enum DXGI_FORMAT for D3D10/D3D11
//! @return OpenCV type or -1 if there is no equivalent
CV_EXPORTS int getTypeFromDXGI_FORMAT(const int iDXGI_FORMAT); // enum DXGI_FORMAT for D3D10/D3D11
// Get OpenCV type from DirectX type, return -1 if there is no equivalent
//! @brief Get OpenCV type from DirectX type
//! @param iD3DFORMAT - enum D3DTYPE for D3D9
//! @return OpenCV type or -1 if there is no equivalent
CV_EXPORTS int getTypeFromD3DFORMAT(const int iD3DFORMAT); // enum D3DTYPE for D3D9
//! @}

View File

@ -276,6 +276,58 @@ protected:
Impl* p;
};
/*
//! @brief Attaches OpenCL context to OpenCV
//
//! @note Note:
// OpenCV will check if available OpenCL platform has platformName name,
// then assign context to OpenCV and call clRetainContext function.
// The deviceID device will be used as target device and new command queue
// will be created.
//
// Params:
//! @param platformName - name of OpenCL platform to attach,
//! this string is used to check if platform is available
//! to OpenCV at runtime
//! @param platfromID - ID of platform attached context was created for
//! @param context - OpenCL context to be attached to OpenCV
//! @param deviceID - ID of device, must be created from attached context
*/
CV_EXPORTS void attachContext(const String& platformName, void* platformID, void* context, void* deviceID);
/*
//! @brief Convert OpenCL buffer to UMat
//
//! @note Note:
// OpenCL buffer (cl_mem_buffer) should contain 2D image data, compatible with OpenCV.
// Memory content is not copied from clBuffer to UMat. Instead, buffer handle assigned
// to UMat and clRetainMemObject is called.
//
// Params:
//! @param cl_mem_buffer - source clBuffer handle
//! @param step - num of bytes in single row
//! @param rows - number of rows
//! @param cols - number of cols
//! @param type - OpenCV type of image
//! @param dst - destination UMat
*/
CV_EXPORTS void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst);
/*
//! @brief Convert OpenCL image2d_t to UMat
//
//! @note Note:
// OpenCL image2d_t (cl_mem_image), should be compatible with OpenCV
// UMat formats.
// Memory content is copied from image to UMat with
// clEnqueueCopyImageToBuffer function.
//
// Params:
//! @param cl_mem_image - source image2d_t handle
//! @param dst - destination UMat
*/
CV_EXPORTS void convertFromImage(void* cl_mem_image, UMat& dst);
// TODO Move to internal header
void initializeContextFromHandle(Context& ctx, void* platform, void* context, void* device);

View File

@ -858,9 +858,9 @@ OCL_FUNC_P(cl_context, clCreateContext,
OCL_FUNC(cl_int, clReleaseContext, (cl_context context), (context))
/*
OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
OCL_FUNC(cl_int, clRetainContext, (cl_context context), (context))
/*
OCL_FUNC_P(cl_context, clCreateContextFromType,
(const cl_context_properties * properties,
cl_device_type device_type,
@ -945,7 +945,6 @@ OCL_FUNC(cl_int, clGetSupportedImageFormats,
(context, flags, image_type, num_entries, image_formats, num_image_formats))
/*
OCL_FUNC(cl_int, clGetMemObjectInfo,
(cl_mem memobj,
cl_mem_info param_name,
@ -962,6 +961,7 @@ OCL_FUNC(cl_int, clGetImageInfo,
size_t * param_value_size_ret),
(image, param_name, param_value_size, param_value, param_value_size_ret))
/*
OCL_FUNC(cl_int, clCreateKernelsInProgram,
(cl_program program,
cl_uint num_kernels,
@ -1038,20 +1038,20 @@ OCL_FUNC(cl_int, clEnqueueCopyImage,
cl_event * event),
(command_queue, src_image, dst_image, src_origin, dst_origin,
region, num_events_in_wait_list, event_wait_list, event))
*/
OCL_FUNC(cl_int, clEnqueueCopyImageToBuffer,
(cl_command_queue command_queue,
cl_mem src_image,
cl_mem dst_buffer,
const size_t * src_origin[3],
const size_t * region[3],
const size_t * src_origin,
const size_t * region,
size_t dst_offset,
cl_uint num_events_in_wait_list,
const cl_event * event_wait_list,
cl_event * event),
(command_queue, src_image, dst_buffer, src_origin, region, dst_offset,
num_events_in_wait_list, event_wait_list, event))
*/
OCL_FUNC(cl_int, clEnqueueCopyBufferToImage,
(cl_command_queue command_queue,
@ -1100,10 +1100,10 @@ OCL_FUNC(cl_int, clGetKernelInfo,
size_t * param_value_size_ret),
(kernel, param_name, param_value_size, param_value, param_value_size_ret))
OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
*/
OCL_FUNC(cl_int, clRetainMemObject, (cl_mem memobj), (memobj))
OCL_FUNC(cl_int, clReleaseMemObject, (cl_mem memobj), (memobj))
@ -1348,7 +1348,7 @@ OCL_FUNC(cl_int, clReleaseEvent, (cl_event event), (event))
#define CL_VERSION_1_2
#endif
#endif
#endif // HAVE_OPENCL
#ifdef _DEBUG
#define CV_OclDbgAssert CV_DbgAssert
@ -2925,6 +2925,83 @@ CV_EXPORTS bool useSVM(UMatUsageFlags usageFlags)
#endif // HAVE_OPENCL_SVM
static void get_platform_name(cl_platform_id id, String& name)
{
// get platform name string length
size_t sz = 0;
if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, 0, 0, &sz))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!");
// get platform name string
AutoBuffer<char> buf(sz + 1);
if (CL_SUCCESS != clGetPlatformInfo(id, CL_PLATFORM_NAME, sz, buf, 0))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformInfo failed!");
// just in case, ensure trailing zero for ASCIIZ string
buf[sz] = 0;
name = (const char*)buf;
}
/*
// Attaches OpenCL context to OpenCV
*/
void attachContext(const String& platformName, void* platformID, void* context, void* deviceID)
{
cl_uint cnt = 0;
if(CL_SUCCESS != clGetPlatformIDs(0, 0, &cnt))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!");
if (cnt == 0)
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "no OpenCL platform available!");
std::vector<cl_platform_id> platforms(cnt);
if(CL_SUCCESS != clGetPlatformIDs(cnt, &platforms[0], 0))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clGetPlatformIDs failed!");
bool platformAvailable = false;
// check if external platformName contained in list of available platforms in OpenCV
for (unsigned int i = 0; i < cnt; i++)
{
String availablePlatformName;
get_platform_name(platforms[i], availablePlatformName);
// external platform is found in the list of available platforms
if (platformName == availablePlatformName)
{
platformAvailable = true;
break;
}
}
if (!platformAvailable)
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!");
// check if platformID corresponds to platformName
String actualPlatformName;
get_platform_name((cl_platform_id)platformID, actualPlatformName);
if (platformName != actualPlatformName)
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "No matched platforms available!");
// do not initialize OpenCL context
Context ctx = Context::getDefault(false);
// attach supplied context to OpenCV
initializeContextFromHandle(ctx, platformID, context, deviceID);
if(CL_SUCCESS != clRetainContext((cl_context)context))
CV_ErrorNoReturn(cv::Error::OpenCLApiCallError, "clRetainContext failed!");
// clear command queue, if any
getCoreTlsData().get()->oclQueue.finish();
Queue q;
getCoreTlsData().get()->oclQueue = q;
return;
} // attachContext()
void initializeContextFromHandle(Context& ctx, void* platform, void* _context, void* _device)
{
@ -3150,10 +3227,10 @@ struct Kernel::Impl
bool haveTempDstUMats;
};
}}
}} // namespace cv::ocl
extern "C" {
extern "C"
{
static void CL_CALLBACK oclCleanupCallback(cl_event, cl_int, void *p)
{
((cv::ocl::Kernel::Impl*)p)->finit();
@ -5166,6 +5243,167 @@ MatAllocator* getOpenCLAllocator()
return allocator;
}
}} // namespace cv::ocl
namespace cv {
// three funcs below are implemented in umatrix.cpp
void setSize( UMat& m, int _dims, const int* _sz, const size_t* _steps,
bool autoSteps = false );
void updateContinuityFlag(UMat& m);
void finalizeHdr(UMat& m);
} // namespace cv
namespace cv { namespace ocl {
/*
// Convert OpenCL buffer memory to UMat
*/
void convertFromBuffer(void* cl_mem_buffer, size_t step, int rows, int cols, int type, UMat& dst)
{
int d = 2;
int sizes[] = { rows, cols };
CV_Assert(0 <= d && d <= CV_MAX_DIM);
dst.release();
dst.flags = (type & Mat::TYPE_MASK) | Mat::MAGIC_VAL;
dst.usageFlags = USAGE_DEFAULT;
setSize(dst, d, sizes, 0, true);
dst.offset = 0;
cl_mem memobj = (cl_mem)cl_mem_buffer;
cl_mem_object_type mem_type = 0;
CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS);
CV_Assert(CL_MEM_OBJECT_BUFFER == mem_type);
size_t total = 0;
CV_Assert(clGetMemObjectInfo(memobj, CL_MEM_SIZE, sizeof(size_t), &total, 0) == CL_SUCCESS);
CV_Assert(clRetainMemObject(memobj) == CL_SUCCESS);
CV_Assert((int)step >= cols * CV_ELEM_SIZE(type));
CV_Assert(total >= rows * step);
// attach clBuffer to UMatData
dst.u = new UMatData(getOpenCLAllocator());
dst.u->data = 0;
dst.u->allocatorFlags_ = 0; // not allocated from any OpenCV buffer pool
dst.u->flags = 0;
dst.u->handle = cl_mem_buffer;
dst.u->origdata = 0;
dst.u->prevAllocator = 0;
dst.u->size = total;
finalizeHdr(dst);
dst.addref();
return;
} // convertFromBuffer()
/*
// Convert OpenCL image2d_t memory to UMat
*/
void convertFromImage(void* cl_mem_image, UMat& dst)
{
cl_mem clImage = (cl_mem)cl_mem_image;
cl_mem_object_type mem_type = 0;
CV_Assert(clGetMemObjectInfo(clImage, CL_MEM_TYPE, sizeof(cl_mem_object_type), &mem_type, 0) == CL_SUCCESS);
CV_Assert(CL_MEM_OBJECT_IMAGE2D == mem_type);
cl_image_format fmt = { 0, 0 };
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_FORMAT, sizeof(cl_image_format), &fmt, 0) == CL_SUCCESS);
int depth = CV_8U;
switch (fmt.image_channel_data_type)
{
case CL_UNORM_INT8:
case CL_UNSIGNED_INT8:
depth = CV_8U;
break;
case CL_SNORM_INT8:
case CL_SIGNED_INT8:
depth = CV_8S;
break;
case CL_UNORM_INT16:
case CL_UNSIGNED_INT16:
depth = CV_16U;
break;
case CL_SNORM_INT16:
case CL_SIGNED_INT16:
depth = CV_16S;
break;
case CL_SIGNED_INT32:
depth = CV_32S;
break;
case CL_FLOAT:
depth = CV_32F;
break;
default:
CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_data_type");
}
int type = CV_8UC1;
switch (fmt.image_channel_order)
{
case CL_R:
type = CV_MAKE_TYPE(depth, 1);
break;
case CL_RGBA:
case CL_BGRA:
case CL_ARGB:
type = CV_MAKE_TYPE(depth, 4);
break;
default:
CV_Error(cv::Error::OpenCLApiCallError, "Not supported image_channel_order");
break;
}
size_t step = 0;
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_ROW_PITCH, sizeof(size_t), &step, 0) == CL_SUCCESS);
size_t w = 0;
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_WIDTH, sizeof(size_t), &w, 0) == CL_SUCCESS);
size_t h = 0;
CV_Assert(clGetImageInfo(clImage, CL_IMAGE_HEIGHT, sizeof(size_t), &h, 0) == CL_SUCCESS);
dst.create((int)h, (int)w, type);
cl_mem clBuffer = (cl_mem)dst.handle(ACCESS_READ);
cl_command_queue q = (cl_command_queue)Queue::getDefault().ptr();
size_t offset = 0;
size_t src_origin[3] = { 0, 0, 0 };
size_t region[3] = { w, h, 1 };
CV_Assert(clEnqueueCopyImageToBuffer(q, clImage, clBuffer, src_origin, region, offset, 0, NULL, NULL) == CL_SUCCESS);
CV_Assert(clFinish(q) == CL_SUCCESS);
return;
} // convertFromImage()
///////////////////////////////////////////// Utility functions /////////////////////////////////////////////////
static void getDevices(std::vector<cl_device_id>& devices, cl_platform_id platform)

View File

@ -46,6 +46,13 @@
namespace cv {
// forward decls, implementation is below in this file
void setSize(UMat& m, int _dims, const int* _sz, const size_t* _steps,
bool autoSteps = false);
void updateContinuityFlag(UMat& m);
void finalizeHdr(UMat& m);
// it should be a prime number for the best hash function
enum { UMAT_NLOCKS = 31 };
static Mutex umatLocks[UMAT_NLOCKS];
@ -123,8 +130,8 @@ void swap( UMat& a, UMat& b )
}
static inline void setSize( UMat& m, int _dims, const int* _sz,
const size_t* _steps, bool autoSteps=false )
void setSize( UMat& m, int _dims, const int* _sz,
const size_t* _steps, bool autoSteps )
{
CV_Assert( 0 <= _dims && _dims <= CV_MAX_DIM );
if( m.dims != _dims )
@ -176,7 +183,8 @@ static inline void setSize( UMat& m, int _dims, const int* _sz,
}
}
static void updateContinuityFlag(UMat& m)
void updateContinuityFlag(UMat& m)
{
int i, j;
for( i = 0; i < m.dims; i++ )
@ -199,7 +207,7 @@ static void updateContinuityFlag(UMat& m)
}
static void finalizeHdr(UMat& m)
void finalizeHdr(UMat& m)
{
updateContinuityFlag(m);
int d = m.dims;
@ -207,6 +215,7 @@ static void finalizeHdr(UMat& m)
m.rows = m.cols = -1;
}
UMat Mat::getUMat(int accessFlags, UMatUsageFlags usageFlags) const
{
UMat hdr;

View File

@ -66,6 +66,8 @@ endif()
add_subdirectory(cpp)
# FIXIT: can't use cvconfig.h in samples: add_subdirectory(gpu)
add_subdirectory(opencl)
if(WIN32)
add_subdirectory(directx)
endif()

View File

@ -0,0 +1,68 @@
# cmake 3.1 needed for find_package(OpenCL)
if(CMAKE_VERSION VERSION_LESS "3.1")
message(STATUS "OpenCL samples require CMakes 3.1+")
return()
endif()
set(
OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS
opencv_core
opencv_imgproc
opencv_video
opencv_imgcodecs
opencv_videoio
opencv_highgui)
ocv_check_dependencies(${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS})
if(BUILD_EXAMPLES AND OCV_DEPENDENCIES_FOUND)
find_package(OpenCL 1.2 REQUIRED)
set(project "opencl")
string(TOUPPER "${project}" project_upper)
project("${project}_samples")
ocv_include_modules_recurse(${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS})
include_directories(${OpenCL_INCLUDE_DIR})
# ---------------------------------------------
# Define executable targets
# ---------------------------------------------
MACRO(OPENCV_DEFINE_OPENCL_EXAMPLE name srcs)
set(the_target "example_${project}_${name}")
add_executable(${the_target} ${srcs})
ocv_target_link_libraries(
${the_target}
${OPENCV_LINKER_LIBS}
${OPENCV_OPENCL_SAMPLES_REQUIRED_DEPS}
${OpenCL_LIBRARY})
set_target_properties(${the_target} PROPERTIES
OUTPUT_NAME "${project}-example-${name}"
PROJECT_LABEL "(EXAMPLE_${project_upper}) ${name}")
if(ENABLE_SOLUTION_FOLDERS)
set_target_properties(${the_target} PROPERTIES FOLDER "samples//${project}")
endif()
if(WIN32)
if(MSVC AND NOT BUILD_SHARED_LIBS)
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG")
endif()
install(TARGETS ${the_target} RUNTIME DESTINATION "${OPENCV_SAMPLES_BIN_INSTALL_PATH}/${project}" COMPONENT main)
endif()
ENDMACRO()
file(GLOB all_samples RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
foreach(sample_filename ${all_samples})
get_filename_component(sample ${sample_filename} NAME_WE)
file(GLOB sample_srcs RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${sample}.*)
OPENCV_DEFINE_OPENCL_EXAMPLE(${sample} ${sample_srcs})
endforeach()
endif()

View File

@ -0,0 +1,966 @@
/*
// The example of interoperability between OpenCL and OpenCV.
// This will loop through frames of video either from input media file
// or camera device and do processing of these data in OpenCL and then
// in OpenCV. In OpenCL it does inversion of pixels in half of frame and
// in OpenCV it does bluring the whole frame.
*/
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#if __APPLE__
#include <OpenCL/cl.h>
#else
#include <CL/cl.h>
#endif
#include <opencv2/core/ocl.hpp>
#include <opencv2/core/utility.hpp>
#include <opencv2/video.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgproc.hpp>
using namespace std;
using namespace cv;
namespace opencl {
class PlatformInfo
{
public:
PlatformInfo()
{}
~PlatformInfo()
{}
cl_int QueryInfo(cl_platform_id id)
{
query_param(id, CL_PLATFORM_PROFILE, m_profile);
query_param(id, CL_PLATFORM_VERSION, m_version);
query_param(id, CL_PLATFORM_NAME, m_name);
query_param(id, CL_PLATFORM_VENDOR, m_vendor);
query_param(id, CL_PLATFORM_EXTENSIONS, m_extensions);
return CL_SUCCESS;
}
std::string Profile() { return m_profile; }
std::string Version() { return m_version; }
std::string Name() { return m_name; }
std::string Vendor() { return m_vendor; }
std::string Extensions() { return m_extensions; }
private:
cl_int query_param(cl_platform_id id, cl_platform_info param, std::string& paramStr)
{
cl_int res;
size_t psize;
cv::AutoBuffer<char> buf;
res = clGetPlatformInfo(id, param, 0, 0, &psize);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetPlatformInfo failed"));
buf.resize(psize);
res = clGetPlatformInfo(id, param, psize, buf, 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetPlatformInfo failed"));
// just in case, ensure trailing zero for ASCIIZ string
buf[psize] = 0;
paramStr = buf;
return CL_SUCCESS;
}
private:
std::string m_profile;
std::string m_version;
std::string m_name;
std::string m_vendor;
std::string m_extensions;
};
class DeviceInfo
{
public:
DeviceInfo()
{}
~DeviceInfo()
{}
cl_int QueryInfo(cl_device_id id)
{
query_param(id, CL_DEVICE_TYPE, m_type);
query_param(id, CL_DEVICE_VENDOR_ID, m_vendor_id);
query_param(id, CL_DEVICE_MAX_COMPUTE_UNITS, m_max_compute_units);
query_param(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, m_max_work_item_dimensions);
query_param(id, CL_DEVICE_MAX_WORK_ITEM_SIZES, m_max_work_item_sizes);
query_param(id, CL_DEVICE_MAX_WORK_GROUP_SIZE, m_max_work_group_size);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, m_preferred_vector_width_char);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, m_preferred_vector_width_short);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, m_preferred_vector_width_int);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, m_preferred_vector_width_long);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, m_preferred_vector_width_float);
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, m_preferred_vector_width_double);
#if defined(CL_VERSION_1_1)
query_param(id, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, m_preferred_vector_width_half);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, m_native_vector_width_char);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, m_native_vector_width_short);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, m_native_vector_width_int);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, m_native_vector_width_long);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, m_native_vector_width_float);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, m_native_vector_width_double);
query_param(id, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, m_native_vector_width_half);
#endif
query_param(id, CL_DEVICE_MAX_CLOCK_FREQUENCY, m_max_clock_frequency);
query_param(id, CL_DEVICE_ADDRESS_BITS, m_address_bits);
query_param(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE, m_max_mem_alloc_size);
query_param(id, CL_DEVICE_IMAGE_SUPPORT, m_image_support);
query_param(id, CL_DEVICE_MAX_READ_IMAGE_ARGS, m_max_read_image_args);
query_param(id, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, m_max_write_image_args);
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, m_max_read_write_image_args);
#endif
query_param(id, CL_DEVICE_IMAGE2D_MAX_WIDTH, m_image2d_max_width);
query_param(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT, m_image2d_max_height);
query_param(id, CL_DEVICE_IMAGE3D_MAX_WIDTH, m_image3d_max_width);
query_param(id, CL_DEVICE_IMAGE3D_MAX_HEIGHT, m_image3d_max_height);
query_param(id, CL_DEVICE_IMAGE3D_MAX_DEPTH, m_image3d_max_depth);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, m_image_max_buffer_size);
query_param(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, m_image_max_array_size);
#endif
query_param(id, CL_DEVICE_MAX_SAMPLERS, m_max_samplers);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, m_image_pitch_alignment);
query_param(id, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, m_image_base_address_alignment);
#endif
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_MAX_PIPE_ARGS, m_max_pipe_args);
query_param(id, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, m_pipe_max_active_reservations);
query_param(id, CL_DEVICE_PIPE_MAX_PACKET_SIZE, m_pipe_max_packet_size);
#endif
query_param(id, CL_DEVICE_MAX_PARAMETER_SIZE, m_max_parameter_size);
query_param(id, CL_DEVICE_MEM_BASE_ADDR_ALIGN, m_mem_base_addr_align);
query_param(id, CL_DEVICE_SINGLE_FP_CONFIG, m_single_fp_config);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_DOUBLE_FP_CONFIG, m_double_fp_config);
#endif
query_param(id, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, m_global_mem_cache_type);
query_param(id, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, m_global_mem_cacheline_size);
query_param(id, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, m_global_mem_cache_size);
query_param(id, CL_DEVICE_GLOBAL_MEM_SIZE, m_global_mem_size);
query_param(id, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, m_max_constant_buffer_size);
query_param(id, CL_DEVICE_MAX_CONSTANT_ARGS, m_max_constant_args);
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, m_max_global_variable_size);
query_param(id, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, m_global_variable_preferred_total_size);
#endif
query_param(id, CL_DEVICE_LOCAL_MEM_TYPE, m_local_mem_type);
query_param(id, CL_DEVICE_LOCAL_MEM_SIZE, m_local_mem_size);
query_param(id, CL_DEVICE_ERROR_CORRECTION_SUPPORT, m_error_correction_support);
#if defined(CL_VERSION_1_1)
query_param(id, CL_DEVICE_HOST_UNIFIED_MEMORY, m_host_unified_memory);
#endif
query_param(id, CL_DEVICE_PROFILING_TIMER_RESOLUTION, m_profiling_timer_resolution);
query_param(id, CL_DEVICE_ENDIAN_LITTLE, m_endian_little);
query_param(id, CL_DEVICE_AVAILABLE, m_available);
query_param(id, CL_DEVICE_COMPILER_AVAILABLE, m_compiler_available);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_LINKER_AVAILABLE, m_linker_available);
#endif
query_param(id, CL_DEVICE_EXECUTION_CAPABILITIES, m_execution_capabilities);
query_param(id, CL_DEVICE_QUEUE_PROPERTIES, m_queue_properties);
#if defined(CL_VERSION_2_0)
query_param(id, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, m_queue_on_host_properties);
query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, m_queue_on_device_properties);
query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, m_queue_on_device_preferred_size);
query_param(id, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, m_queue_on_device_max_size);
query_param(id, CL_DEVICE_MAX_ON_DEVICE_QUEUES, m_max_on_device_queues);
query_param(id, CL_DEVICE_MAX_ON_DEVICE_EVENTS, m_max_on_device_events);
#endif
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_BUILT_IN_KERNELS, m_built_in_kernels);
#endif
query_param(id, CL_DEVICE_PLATFORM, m_platform);
query_param(id, CL_DEVICE_NAME, m_name);
query_param(id, CL_DEVICE_VENDOR, m_vendor);
query_param(id, CL_DRIVER_VERSION, m_driver_version);
query_param(id, CL_DEVICE_PROFILE, m_profile);
query_param(id, CL_DEVICE_VERSION, m_version);
#if defined(CL_VERSION_1_1)
query_param(id, CL_DEVICE_OPENCL_C_VERSION, m_opencl_c_version);
#endif
query_param(id, CL_DEVICE_EXTENSIONS, m_extensions);
#if defined(CL_VERSION_1_2)
query_param(id, CL_DEVICE_PRINTF_BUFFER_SIZE, m_printf_buffer_size);
query_param(id, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, m_preferred_interop_user_sync);
query_param(id, CL_DEVICE_PARENT_DEVICE, m_parent_device);
query_param(id, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, m_partition_max_sub_devices);
query_param(id, CL_DEVICE_PARTITION_PROPERTIES, m_partition_properties);
query_param(id, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, m_partition_affinity_domain);
query_param(id, CL_DEVICE_PARTITION_TYPE, m_partition_type);
query_param(id, CL_DEVICE_REFERENCE_COUNT, m_reference_count);
#endif
return CL_SUCCESS;
}
std::string Name() { return m_name; }
private:
template<typename T>
cl_int query_param(cl_device_id id, cl_device_info param, T& value)
{
cl_int res;
size_t size = 0;
res = clGetDeviceInfo(id, param, 0, 0, &size);
if (CL_SUCCESS != res && size != 0)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
if (0 == size)
return CL_SUCCESS;
if (sizeof(T) != size)
throw std::runtime_error(std::string("clGetDeviceInfo: param size mismatch"));
res = clGetDeviceInfo(id, param, size, &value, 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
return CL_SUCCESS;
}
template<typename T>
cl_int query_param(cl_device_id id, cl_device_info param, std::vector<T>& value)
{
cl_int res;
size_t size;
res = clGetDeviceInfo(id, param, 0, 0, &size);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
if (0 == size)
return CL_SUCCESS;
value.resize(size / sizeof(T));
res = clGetDeviceInfo(id, param, size, &value[0], 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
return CL_SUCCESS;
}
cl_int query_param(cl_device_id id, cl_device_info param, std::string& value)
{
cl_int res;
size_t size;
res = clGetDeviceInfo(id, param, 0, 0, &size);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
value.resize(size + 1);
res = clGetDeviceInfo(id, param, size, &value[0], 0);
if (CL_SUCCESS != res)
throw std::runtime_error(std::string("clGetDeviceInfo failed"));
// just in case, ensure trailing zero for ASCIIZ string
value[size] = 0;
return CL_SUCCESS;
}
private:
cl_device_type m_type;
cl_uint m_vendor_id;
cl_uint m_max_compute_units;
cl_uint m_max_work_item_dimensions;
std::vector<size_t> m_max_work_item_sizes;
size_t m_max_work_group_size;
cl_uint m_preferred_vector_width_char;
cl_uint m_preferred_vector_width_short;
cl_uint m_preferred_vector_width_int;
cl_uint m_preferred_vector_width_long;
cl_uint m_preferred_vector_width_float;
cl_uint m_preferred_vector_width_double;
#if defined(CL_VERSION_1_1)
cl_uint m_preferred_vector_width_half;
cl_uint m_native_vector_width_char;
cl_uint m_native_vector_width_short;
cl_uint m_native_vector_width_int;
cl_uint m_native_vector_width_long;
cl_uint m_native_vector_width_float;
cl_uint m_native_vector_width_double;
cl_uint m_native_vector_width_half;
#endif
cl_uint m_max_clock_frequency;
cl_uint m_address_bits;
cl_ulong m_max_mem_alloc_size;
cl_bool m_image_support;
cl_uint m_max_read_image_args;
cl_uint m_max_write_image_args;
#if defined(CL_VERSION_2_0)
cl_uint m_max_read_write_image_args;
#endif
size_t m_image2d_max_width;
size_t m_image2d_max_height;
size_t m_image3d_max_width;
size_t m_image3d_max_height;
size_t m_image3d_max_depth;
#if defined(CL_VERSION_1_2)
size_t m_image_max_buffer_size;
size_t m_image_max_array_size;
#endif
cl_uint m_max_samplers;
#if defined(CL_VERSION_1_2)
cl_uint m_image_pitch_alignment;
cl_uint m_image_base_address_alignment;
#endif
#if defined(CL_VERSION_2_0)
cl_uint m_max_pipe_args;
cl_uint m_pipe_max_active_reservations;
cl_uint m_pipe_max_packet_size;
#endif
size_t m_max_parameter_size;
cl_uint m_mem_base_addr_align;
cl_device_fp_config m_single_fp_config;
#if defined(CL_VERSION_1_2)
cl_device_fp_config m_double_fp_config;
#endif
cl_device_mem_cache_type m_global_mem_cache_type;
cl_uint m_global_mem_cacheline_size;
cl_ulong m_global_mem_cache_size;
cl_ulong m_global_mem_size;
cl_ulong m_max_constant_buffer_size;
cl_uint m_max_constant_args;
#if defined(CL_VERSION_2_0)
size_t m_max_global_variable_size;
size_t m_global_variable_preferred_total_size;
#endif
cl_device_local_mem_type m_local_mem_type;
cl_ulong m_local_mem_size;
cl_bool m_error_correction_support;
#if defined(CL_VERSION_1_1)
cl_bool m_host_unified_memory;
#endif
size_t m_profiling_timer_resolution;
cl_bool m_endian_little;
cl_bool m_available;
cl_bool m_compiler_available;
#if defined(CL_VERSION_1_2)
cl_bool m_linker_available;
#endif
cl_device_exec_capabilities m_execution_capabilities;
cl_command_queue_properties m_queue_properties;
#if defined(CL_VERSION_2_0)
cl_command_queue_properties m_queue_on_host_properties;
cl_command_queue_properties m_queue_on_device_properties;
cl_uint m_queue_on_device_preferred_size;
cl_uint m_queue_on_device_max_size;
cl_uint m_max_on_device_queues;
cl_uint m_max_on_device_events;
#endif
#if defined(CL_VERSION_1_2)
std::string m_built_in_kernels;
#endif
cl_platform_id m_platform;
std::string m_name;
std::string m_vendor;
std::string m_driver_version;
std::string m_profile;
std::string m_version;
#if defined(CL_VERSION_1_1)
std::string m_opencl_c_version;
#endif
std::string m_extensions;
#if defined(CL_VERSION_1_2)
size_t m_printf_buffer_size;
cl_bool m_preferred_interop_user_sync;
cl_device_id m_parent_device;
cl_uint m_partition_max_sub_devices;
std::vector<cl_device_partition_property> m_partition_properties;
cl_device_affinity_domain m_partition_affinity_domain;
std::vector<cl_device_partition_property> m_partition_type;
cl_uint m_reference_count;
#endif
};
} // namespace opencl
class App
{
public:
App(CommandLineParser& cmd);
~App();
int initOpenCL();
int initVideoSource();
int process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* cl_buffer);
int process_cl_buffer_with_opencv(cl_mem buffer, size_t step, int rows, int cols, int type, cv::UMat& u);
int process_cl_image_with_opencv(cl_mem image, cv::UMat& u);
int run();
bool isRunning() { return m_running; }
bool doProcess() { return m_process; }
bool useBuffer() { return m_use_buffer; }
void setRunning(bool running) { m_running = running; }
void setDoProcess(bool process) { m_process = process; }
void setUseBuffer(bool use_buffer) { m_use_buffer = use_buffer; }
protected:
bool nextFrame(cv::Mat& frame) { return m_cap.read(frame); }
void handleKey(char key);
void timerStart();
void timerEnd();
std::string fpsStr() const;
std::string message() const;
private:
bool m_running;
bool m_process;
bool m_use_buffer;
int64 m_t0;
int64 m_t1;
double m_fps;
string m_file_name;
int m_camera_id;
cv::VideoCapture m_cap;
cv::Mat m_frame;
cv::Mat m_frameGray;
opencl::PlatformInfo m_platformInfo;
opencl::DeviceInfo m_deviceInfo;
std::vector<cl_platform_id> m_platform_ids;
cl_context m_context;
cl_device_id m_device_id;
cl_command_queue m_queue;
cl_program m_program;
cl_kernel m_kernelBuf;
cl_kernel m_kernelImg;
cl_mem m_mem_obj;
cl_event m_event;
};
App::App(CommandLineParser& cmd)
{
cout << "\nPress ESC to exit\n" << endl;
cout << "\n 'p' to toggle ON/OFF processing\n" << endl;
cout << "\n SPACE to switch between OpenCL buffer/image\n" << endl;
m_camera_id = cmd.get<int>("camera");
m_file_name = cmd.get<string>("video");
m_running = false;
m_process = false;
m_use_buffer = false;
m_context = 0;
m_device_id = 0;
m_queue = 0;
m_program = 0;
m_kernelBuf = 0;
m_kernelImg = 0;
m_mem_obj = 0;
m_event = 0;
} // ctor
App::~App()
{
if (m_queue)
{
clFinish(m_queue);
clReleaseCommandQueue(m_queue);
m_queue = 0;
}
if (m_program)
{
clReleaseProgram(m_program);
m_program = 0;
}
if (m_mem_obj)
{
clReleaseMemObject(m_mem_obj);
m_mem_obj = 0;
}
if (m_event)
{
clReleaseEvent(m_event);
}
if (m_kernelBuf)
{
clReleaseKernel(m_kernelBuf);
m_kernelBuf = 0;
}
if (m_kernelImg)
{
clReleaseKernel(m_kernelImg);
m_kernelImg = 0;
}
if (m_device_id)
{
clReleaseDevice(m_device_id);
m_device_id = 0;
}
if (m_context)
{
clReleaseContext(m_context);
m_context = 0;
}
} // dtor
int App::initOpenCL()
{
cl_int res = CL_SUCCESS;
cl_uint num_entries = 0;
res = clGetPlatformIDs(0, 0, &num_entries);
if (CL_SUCCESS != res)
return -1;
m_platform_ids.resize(num_entries);
res = clGetPlatformIDs(num_entries, &m_platform_ids[0], 0);
if (CL_SUCCESS != res)
return -1;
unsigned int i;
// create context from first platform with GPU device
for (i = 0; i < m_platform_ids.size(); i++)
{
cl_context_properties props[] =
{
CL_CONTEXT_PLATFORM,
(cl_context_properties)(m_platform_ids[i]),
0
};
m_context = clCreateContextFromType(props, CL_DEVICE_TYPE_GPU, 0, 0, &res);
if (0 == m_context || CL_SUCCESS != res)
continue;
res = clGetContextInfo(m_context, CL_CONTEXT_DEVICES, sizeof(cl_device_id), &m_device_id, 0);
if (CL_SUCCESS != res)
return -1;
m_queue = clCreateCommandQueue(m_context, m_device_id, 0, &res);
if (0 == m_queue || CL_SUCCESS != res)
return -1;
const char* kernelSrc =
"__kernel "
"void bitwise_inv_buf_8uC1("
" __global unsigned char* pSrcDst,"
" int srcDstStep,"
" int rows,"
" int cols)"
"{"
" int x = get_global_id(0);"
" int y = get_global_id(1);"
" int idx = mad24(y, srcDstStep, x);"
" pSrcDst[idx] = ~pSrcDst[idx];"
"}"
"__kernel "
"void bitwise_inv_img_8uC1("
" read_only image2d_t srcImg,"
" write_only image2d_t dstImg)"
"{"
" int x = get_global_id(0);"
" int y = get_global_id(1);"
" int2 coord = (int2)(x, y);"
" uint4 val = read_imageui(srcImg, coord);"
" val.x = (~val.x) & 0x000000FF;"
" write_imageui(dstImg, coord, val);"
"}";
size_t len = strlen(kernelSrc);
m_program = clCreateProgramWithSource(m_context, 1, &kernelSrc, &len, &res);
if (0 == m_program || CL_SUCCESS != res)
return -1;
res = clBuildProgram(m_program, 1, &m_device_id, 0, 0, 0);
if (CL_SUCCESS != res)
return -1;
m_kernelBuf = clCreateKernel(m_program, "bitwise_inv_buf_8uC1", &res);
if (0 == m_kernelBuf || CL_SUCCESS != res)
return -1;
m_kernelImg = clCreateKernel(m_program, "bitwise_inv_img_8uC1", &res);
if (0 == m_kernelImg || CL_SUCCESS != res)
return -1;
m_platformInfo.QueryInfo(m_platform_ids[i]);
m_deviceInfo.QueryInfo(m_device_id);
// attach OpenCL context to OpenCV
cv::ocl::attachContext(m_platformInfo.Name(), m_platform_ids[i], m_context, m_device_id);
break;
}
return m_context != 0 ? CL_SUCCESS : -1;
} // initOpenCL()
int App::initVideoSource()
{
try
{
if (!m_file_name.empty() && m_camera_id == -1)
{
m_cap.open(m_file_name.c_str());
if (!m_cap.isOpened())
throw std::runtime_error(std::string("can't open video file: " + m_file_name));
}
else if (m_camera_id != -1)
{
m_cap.open(m_camera_id);
if (!m_cap.isOpened())
{
std::stringstream msg;
msg << "can't open camera: " << m_camera_id;
throw std::runtime_error(msg.str());
}
}
else
throw std::runtime_error(std::string("specify video source"));
}
catch (std::exception e)
{
cerr << "ERROR: " << e.what() << std::endl;
return -1;
}
return 0;
} // initVideoSource()
// this function is an example of "typical" OpenCL processing pipeline
// It creates OpenCL buffer or image, depending on use_buffer flag,
// from input media frame and process these data
// (inverts each pixel value in half of frame) with OpenCL kernel
int App::process_frame_with_open_cl(cv::Mat& frame, bool use_buffer, cl_mem* mem_obj)
{
cl_int res = CL_SUCCESS;
CV_Assert(mem_obj);
cl_kernel kernel = 0;
cl_mem mem = mem_obj[0];
if (0 == mem)
{
// first time initialization
cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR;
if (use_buffer)
{
// allocate OpenCL memory to keep single frame,
// reuse this memory for subsecuent frames
// memory will be deallocated at dtor
mem = clCreateBuffer(m_context, flags, frame.total(), frame.ptr(), &res);
if (0 == mem || CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelBuf, 0, sizeof(cl_mem), &mem);
if (CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelBuf, 1, sizeof(int), &frame.step[0]);
if (CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelBuf, 2, sizeof(int), &frame.rows);
if (CL_SUCCESS != res)
return -1;
int cols2 = frame.cols / 2;
res = clSetKernelArg(m_kernelBuf, 3, sizeof(int), &cols2);
if (CL_SUCCESS != res)
return -1;
kernel = m_kernelBuf;
}
else
{
cl_image_format fmt;
fmt.image_channel_order = CL_R;
fmt.image_channel_data_type = CL_UNSIGNED_INT8;
cl_image_desc desc;
desc.image_type = CL_MEM_OBJECT_IMAGE2D;
desc.image_width = frame.cols;
desc.image_height = frame.rows;
desc.image_depth = 0;
desc.image_array_size = 0;
desc.image_row_pitch = frame.step[0];
desc.image_slice_pitch = 0;
desc.num_mip_levels = 0;
desc.num_samples = 0;
desc.buffer = 0;
mem = clCreateImage(m_context, flags, &fmt, &desc, frame.ptr(), &res);
if (0 == mem || CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelImg, 0, sizeof(cl_mem), &mem);
if (CL_SUCCESS != res)
return -1;
res = clSetKernelArg(m_kernelImg, 1, sizeof(cl_mem), &mem);
if (CL_SUCCESS != res)
return -1;
kernel = m_kernelImg;
}
}
m_event = clCreateUserEvent(m_context, &res);
if (0 == m_event || CL_SUCCESS != res)
return -1;
// process left half of frame in OpenCL
size_t size[] = { frame.cols / 2, frame.rows };
res = clEnqueueNDRangeKernel(m_queue, kernel, 2, 0, size, 0, 0, 0, &m_event);
if (CL_SUCCESS != res)
return -1;
res = clWaitForEvents(1, &m_event);
if (CL_SUCCESS != res)
return - 1;
mem_obj[0] = mem;
return 0;
}
// this function is an example of interoperability between OpenCL buffer
// and OpenCV UMat objects. It converts (without copying data) OpenCL buffer
// to OpenCV UMat and then do blur on these data
int App::process_cl_buffer_with_opencv(cl_mem buffer, size_t step, int rows, int cols, int type, cv::UMat& u)
{
cv::ocl::convertFromBuffer(buffer, step, rows, cols, type, u);
// process right half of frame in OpenCV
cv::Point pt(u.cols / 2, 0);
cv::Size sz(u.cols / 2, u.rows);
cv::Rect roi(pt, sz);
cv::UMat uroi(u, roi);
cv::blur(uroi, uroi, cv::Size(7, 7), cv::Point(-3, -3));
if (buffer)
clReleaseMemObject(buffer);
m_mem_obj = 0;
return 0;
}
// this function is an example of interoperability between OpenCL image
// and OpenCV UMat objects. It converts OpenCL image
// to OpenCV UMat and then do blur on these data
int App::process_cl_image_with_opencv(cl_mem image, cv::UMat& u)
{
cv::ocl::convertFromImage(image, u);
// process right half of frame in OpenCV
cv::Point pt(u.cols / 2, 0);
cv::Size sz(u.cols / 2, u.rows);
cv::Rect roi(pt, sz);
cv::UMat uroi(u, roi);
cv::blur(uroi, uroi, cv::Size(7, 7), cv::Point(-3, -3));
if (image)
clReleaseMemObject(image);
m_mem_obj = 0;
return 0;
}
int App::run()
{
if (0 != initOpenCL())
return -1;
if (0 != initVideoSource())
return -1;
Mat img_to_show;
// set running state until ESC pressed
setRunning(true);
// set process flag to show some data processing
// can be toggled on/off by 'p' button
setDoProcess(true);
// set use buffer flag,
// when it is set to true, will demo interop opencl buffer and cv::Umat,
// otherwise demo interop opencl image and cv::UMat
// can be switched on/of by SPACE button
setUseBuffer(true);
// Iterate over all frames
while (isRunning() && nextFrame(m_frame))
{
cv::cvtColor(m_frame, m_frameGray, COLOR_BGR2GRAY);
UMat uframe;
// work
timerStart();
if (doProcess())
{
process_frame_with_open_cl(m_frameGray, useBuffer(), &m_mem_obj);
if (useBuffer())
process_cl_buffer_with_opencv(
m_mem_obj, m_frameGray.step[0], m_frameGray.rows, m_frameGray.cols, m_frameGray.type(), uframe);
else
process_cl_image_with_opencv(m_mem_obj, uframe);
}
else
{
m_frameGray.copyTo(uframe);
}
timerEnd();
uframe.copyTo(img_to_show);
putText(img_to_show, "Version : " + m_platformInfo.Version(), Point(5, 30), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "Name : " + m_platformInfo.Name(), Point(5, 60), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "Device : " + m_deviceInfo.Name(), Point(5, 90), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
cv::String memtype = useBuffer() ? "buffer" : "image";
putText(img_to_show, "interop with OpenCL " + memtype, Point(5, 120), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
putText(img_to_show, "FPS : " + fpsStr(), Point(5, 150), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2);
imshow("opencl_interop", img_to_show);
handleKey((char)waitKey(3));
}
return 0;
}
void App::handleKey(char key)
{
switch (key)
{
case 27:
setRunning(false);
break;
case ' ':
setUseBuffer(!useBuffer());
break;
case 'p':
case 'P':
setDoProcess( !doProcess() );
break;
default:
break;
}
}
inline void App::timerStart()
{
m_t0 = getTickCount();
}
inline void App::timerEnd()
{
m_t1 = getTickCount();
int64 delta = m_t1 - m_t0;
double freq = getTickFrequency();
m_fps = freq / delta;
}
inline string App::fpsStr() const
{
stringstream ss;
ss << std::fixed << std::setprecision(1) << m_fps;
return ss.str();
}
int main(int argc, char** argv)
{
const char* keys =
"{ help h ? | | print help message }"
"{ camera c | -1 | use camera as input }"
"{ video v | | use video as input }";
CommandLineParser cmd(argc, argv, keys);
if (cmd.has("help"))
{
cmd.printMessage();
return EXIT_SUCCESS;
}
App app(cmd);
try
{
app.run();
}
catch (const cv::Exception& e)
{
cout << "error: " << e.what() << endl;
return 1;
}
catch (const std::exception& e)
{
cout << "error: " << e.what() << endl;
return 1;
}
catch (...)
{
cout << "unknown exception" << endl;
return 1;
}
return EXIT_SUCCESS;
} // main()