Merge pull request #2045 from SpecLad:merge-2.4

This commit is contained in:
Roman Donchenko 2013-12-25 16:18:00 +04:00 committed by OpenCV Buildbot
commit faddd5b97f
41 changed files with 3627 additions and 952 deletions

View File

@ -156,6 +156,7 @@ OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 ) OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF WIN32 )
# OpenCV build components # OpenCV build components
@ -207,7 +208,8 @@ OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions"
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) ) OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND ARM) ) OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF ) OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF ) OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
OCV_OPTION(ENABLE_WINRT_MODE "Build with Windows Runtime support" OFF IF WIN32 ) OCV_OPTION(ENABLE_WINRT_MODE "Build with Windows Runtime support" OFF IF WIN32 )
@ -226,6 +228,15 @@ include(cmake/OpenCVVersion.cmake)
# Save libs and executables in the same place # Save libs and executables in the same place
set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Output directory for applications" ) set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Output directory for applications" )
if (ANDROID)
if (ANDROID_ABI MATCHES "NEON")
set(ENABLE_NEON ON)
endif()
if (ANDROID_ABI MATCHES "VFPV3")
set(ENABLE_VFPV3 ON)
endif()
endif()
if(ANDROID OR WIN32) if(ANDROID OR WIN32)
set(OPENCV_DOC_INSTALL_PATH doc) set(OPENCV_DOC_INSTALL_PATH doc)
elseif(INSTALL_TO_MANGLED_PATHS) elseif(INSTALL_TO_MANGLED_PATHS)
@ -820,6 +831,11 @@ if(DEFINED WITH_XINE)
status(" Xine:" HAVE_XINE THEN "YES (ver ${ALIASOF_libxine_VERSION})" ELSE NO) status(" Xine:" HAVE_XINE THEN "YES (ver ${ALIASOF_libxine_VERSION})" ELSE NO)
endif(DEFINED WITH_XINE) endif(DEFINED WITH_XINE)
if(DEFINED WITH_INTELPERC)
status(" Intel PerC:" HAVE_INTELPERC THEN "YES" ELSE NO)
endif(DEFINED WITH_INTELPERC)
# ========================== Other third-party libraries ========================== # ========================== Other third-party libraries ==========================
status("") status("")
status(" Other third-party libraries:") status(" Other third-party libraries:")

View File

@ -124,6 +124,12 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if(ENABLE_SSE2) if(ENABLE_SSE2)
add_extra_compiler_option(-msse2) add_extra_compiler_option(-msse2)
endif() endif()
if (ENABLE_NEON)
add_extra_compiler_option("-mfpu=neon")
endif()
if (ENABLE_VFPV3 AND NOT ENABLE_NEON)
add_extra_compiler_option("-mfpu=vfpv3")
endif()
# SSE3 and further should be disabled under MingW because it generates compiler errors # SSE3 and further should be disabled under MingW because it generates compiler errors
if(NOT MINGW) if(NOT MINGW)

View File

@ -0,0 +1,20 @@
# Main variables:
# INTELPERC_LIBRARIES and INTELPERC_INCLUDE to link Intel Perceptial Computing SDK modules
# HAVE_INTELPERC for conditional compilation OpenCV with/without Intel Perceptial Computing SDK
if(X86_64)
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/x64" DOC "Path to Intel Perceptual Computing SDK interface libraries")
else()
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/Win32" DOC "Path to Intel Perceptual Computing SDK interface libraries")
endif()
if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
set(HAVE_INTELPERC TRUE)
else()
set(HAVE_INTELPERC FALSE)
message(WARNING "Intel Perceptual Computing SDK library directory (set by INTELPERC_LIB_DIR variable) is not found or does not have Intel Perceptual Computing SDK libraries.")
endif() #if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
mark_as_advanced(FORCE INTELPERC_LIBRARIES INTELPERC_INCLUDE_DIR)

View File

@ -277,3 +277,8 @@ if (NOT IOS)
set(HAVE_QTKIT YES) set(HAVE_QTKIT YES)
endif() endif()
endif() endif()
# --- Intel Perceptual Computing SDK ---
if(WITH_INTELPERC)
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIntelPerCSDK.cmake")
endif(WITH_INTELPERC)

View File

@ -88,6 +88,9 @@
/* Define to 1 if you have the <inttypes.h> header file. */ /* Define to 1 if you have the <inttypes.h> header file. */
#cmakedefine HAVE_INTTYPES_H 1 #cmakedefine HAVE_INTTYPES_H 1
/* Intel Perceptual Computing SDK library */
#cmakedefine HAVE_INTELPERC
/* Intel Integrated Performance Primitives */ /* Intel Integrated Performance Primitives */
#cmakedefine HAVE_IPP #cmakedefine HAVE_IPP

View File

@ -106,8 +106,8 @@ Enable hardware optimizations
----------------------------- -----------------------------
Depending on target platform architecture different instruction sets can be used. By default Depending on target platform architecture different instruction sets can be used. By default
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON`` compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DENABLE_VFPV3=ON``
to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using to cmake command line to enable code generation for VFPv3 and ``-DENABLE_NEON=ON`` for using
NEON SIMD extensions. NEON SIMD extensions.
TBB is supported on multi core ARM SoCs also. TBB is supported on multi core ARM SoCs also.

View File

@ -0,0 +1,79 @@
*******
HighGUI
*******
.. highlight:: cpp
Using Creative Senz3D and other Intel Perceptual Computing SDK compatible depth sensors
=======================================================================================
Depth sensors compatible with Intel Perceptual Computing SDK are supported through ``VideoCapture`` class. Depth map, RGB image and some other formats of output can be retrieved by using familiar interface of ``VideoCapture``.
In order to use depth sensor with OpenCV you should do the following preliminary steps:
#.
Install Intel Perceptual Computing SDK (from here http://www.intel.com/software/perceptual).
#.
Configure OpenCV with Intel Perceptual Computing SDK support by setting ``WITH_INTELPERC`` flag in CMake. If Intel Perceptual Computing SDK is found in install folders OpenCV will be built with Intel Perceptual Computing SDK library (see a status ``INTELPERC`` in CMake log). If CMake process doesn't find Intel Perceptual Computing SDK installation folder automatically, the user should change corresponding CMake variables ``INTELPERC_LIB_DIR`` and ``INTELPERC_INCLUDE_DIR`` to the proper value.
#.
Build OpenCV.
VideoCapture can retrieve the following data:
#.
data given from depth generator:
* ``CV_CAP_INTELPERC_DEPTH_MAP`` - each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth. (CV_16UC1)
* ``CV_CAP_INTELPERC_UVDEPTH_MAP`` - each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates. (CV_32FC2)
* ``CV_CAP_INTELPERC_IR_MAP`` - each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam. (CV_16UC1)
#.
data given from RGB image generator:
* ``CV_CAP_INTELPERC_IMAGE`` - color image. (CV_8UC3)
In order to get depth map from depth sensor use ``VideoCapture::operator >>``, e. g. ::
VideoCapture capture( CV_CAP_INTELPERC );
for(;;)
{
Mat depthMap;
capture >> depthMap;
if( waitKey( 30 ) >= 0 )
break;
}
For getting several data maps use ``VideoCapture::grab`` and ``VideoCapture::retrieve``, e.g. ::
VideoCapture capture(CV_CAP_INTELPERC);
for(;;)
{
Mat depthMap;
Mat image;
Mat irImage;
capture.grab();
capture.retrieve( depthMap, CV_CAP_INTELPERC_DEPTH_MAP );
capture.retrieve( image, CV_CAP_INTELPERC_IMAGE );
capture.retrieve( irImage, CV_CAP_INTELPERC_IR_MAP);
if( waitKey( 30 ) >= 0 )
break;
}
For setting and getting some property of sensor` data generators use ``VideoCapture::set`` and ``VideoCapture::get`` methods respectively, e.g. ::
VideoCapture capture( CV_CAP_INTELPERC );
capture.set( CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0 );
cout << "FPS " << capture.get( CV_CAP_INTELPERC_DEPTH_GENERATOR+CV_CAP_PROP_FPS ) << endl;
Since two types of sensor's data generators are supported (image generator and depth generator), there are two flags that should be used to set/get property of the needed generator:
* CV_CAP_INTELPERC_IMAGE_GENERATOR -- a flag for access to the image generator properties.
* CV_CAP_INTELPERC_DEPTH_GENERATOR -- a flag for access to the depth generator properties. This flag value is assumed by default if neither of the two possible values of the property is set.
For more information please refer to the example of usage intelperc_capture.cpp_ in ``opencv/samples/cpp`` folder.
.. _intelperc_capture.cpp: https://github.com/Itseez/opencv/tree/master/samples/cpp/intelperc_capture.cpp

View File

@ -9,3 +9,4 @@ OpenCV User Guide
ug_features2d.rst ug_features2d.rst
ug_highgui.rst ug_highgui.rst
ug_traincascade.rst ug_traincascade.rst
ug_intelperc.rst

View File

@ -903,7 +903,7 @@ So, the function chooses an operation mode depending on the flags and size of th
* When ``DFT_COMPLEX_OUTPUT`` is set, the output is a complex matrix of the same size as input. * When ``DFT_COMPLEX_OUTPUT`` is set, the output is a complex matrix of the same size as input.
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DCT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above. * When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DFT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
* If the input array is complex and either ``DFT_INVERSE`` or ``DFT_REAL_OUTPUT`` are not set, the output is a complex array of the same size as input. The function performs a forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags ``DFT_INVERSE`` and ``DFT_ROWS``. * If the input array is complex and either ``DFT_INVERSE`` or ``DFT_REAL_OUTPUT`` are not set, the output is a complex array of the same size as input. The function performs a forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags ``DFT_INVERSE`` and ``DFT_ROWS``.

View File

@ -2577,7 +2577,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2]; DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
if( (flags & DFT_ROWS) || src.rows == 1 || if( (flags & DCT_ROWS) || src.rows == 1 ||
(src.cols == 1 && (src.isContinuous() && dst.isContinuous()))) (src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
{ {
stage = end_stage = 0; stage = end_stage = 0;
@ -2597,7 +2597,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
{ {
len = src.cols; len = src.cols;
count = src.rows; count = src.rows;
if( len == 1 && !(flags & DFT_ROWS) ) if( len == 1 && !(flags & DCT_ROWS) )
{ {
len = src.rows; len = src.rows;
count = 1; count = 1;

View File

@ -2760,39 +2760,24 @@ void cv::transpose( InputArray _src, OutputArray _dst )
} }
////////////////////////////////////// completeSymm /////////////////////////////////////////
void cv::completeSymm( InputOutputArray _m, bool LtoR ) void cv::completeSymm( InputOutputArray _m, bool LtoR )
{ {
Mat m = _m.getMat(); Mat m = _m.getMat();
CV_Assert( m.dims <= 2 ); size_t step = m.step, esz = m.elemSize();
CV_Assert( m.dims <= 2 && m.rows == m.cols );
int i, j, nrows = m.rows, type = m.type(); int rows = m.rows;
int j0 = 0, j1 = nrows; int j0 = 0, j1 = rows;
CV_Assert( m.rows == m.cols );
if( type == CV_32FC1 || type == CV_32SC1 ) uchar* data = m.data;
for( int i = 0; i < rows; i++ )
{ {
int* data = (int*)m.data; if( !LtoR ) j1 = i; else j0 = i+1;
size_t step = m.step/sizeof(data[0]); for( int j = j0; j < j1; j++ )
for( i = 0; i < nrows; i++ ) memcpy(data + (i*step + j*esz), data + (j*step + i*esz), esz);
{
if( !LtoR ) j1 = i; else j0 = i+1;
for( j = j0; j < j1; j++ )
data[i*step + j] = data[j*step + i];
}
} }
else if( type == CV_64FC1 )
{
double* data = (double*)m.data;
size_t step = m.step/sizeof(data[0]);
for( i = 0; i < nrows; i++ )
{
if( !LtoR ) j1 = i; else j0 = i+1;
for( j = j0; j < j1; j++ )
data[i*step + j] = data[j*step + i];
}
}
else
CV_Error( CV_StsUnsupportedFormat, "" );
} }

View File

@ -222,6 +222,12 @@ elseif(HAVE_QTKIT)
list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit") list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit")
endif() endif()
if(HAVE_INTELPERC)
list(APPEND highgui_srcs src/cap_intelperc.cpp)
ocv_include_directories(${INTELPERC_INCLUDE_DIR})
list(APPEND HIGHGUI_LIBRARIES ${INTELPERC_LIBRARIES})
endif(HAVE_INTELPERC)
if(IOS) if(IOS)
add_definitions(-DHAVE_IOS=1) add_definitions(-DHAVE_IOS=1)
list(APPEND highgui_srcs src/ios_conversions.mm src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm) list(APPEND highgui_srcs src/ios_conversions.mm src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)

View File

@ -271,7 +271,8 @@ enum { CAP_ANY = 0, // autodetect
CAP_XIAPI = 1100, // XIMEA Camera API CAP_XIAPI = 1100, // XIMEA Camera API
CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API) CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
CAP_MSMF = 1400 // Microsoft Media Foundation (via videoInput) CAP_MSMF = 1400, // Microsoft Media Foundation (via videoInput)
CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
}; };
// generic properties (based on DC1394 properties) // generic properties (based on DC1394 properties)
@ -496,6 +497,26 @@ enum { CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006 CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
}; };
enum { CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007
};
// Intel PerC streams
enum { CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR
};
enum { CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
CAP_INTELPERC_IMAGE = 3
};
class CV_EXPORTS_W VideoCapture class CV_EXPORTS_W VideoCapture
{ {

View File

@ -313,7 +313,9 @@ enum
CV_CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API) CV_CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
CV_CAP_GIGANETIX = 1300 // Smartek Giganetix GigEVisionSDK CV_CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
CV_CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
}; };
/* start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*) */ /* start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*) */
@ -459,16 +461,29 @@ enum
CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002, CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002,
CV_CAP_PROP_IOS_DEVICE_FLASH = 9003, CV_CAP_PROP_IOS_DEVICE_FLASH = 9003,
CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004, CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005 CV_CAP_PROP_IOS_DEVICE_TORCH = 9005,
// Properties of cameras available through Smartek Giganetix Ethernet Vision interface // Properties of cameras available through Smartek Giganetix Ethernet Vision interface
/* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */ /* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
,CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001, CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002, CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002,
CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003, CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003,
CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004, CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004,
CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005, CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006 CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006,
CV_CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
CV_CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007,
// Intel PerC streams
CV_CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
CV_CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
CV_CAP_INTELPERC_GENERATORS_MASK = CV_CAP_INTELPERC_DEPTH_GENERATOR + CV_CAP_INTELPERC_IMAGE_GENERATOR
}; };
enum enum
@ -549,6 +564,14 @@ enum
CV_CAP_ANDROID_ANTIBANDING_OFF CV_CAP_ANDROID_ANTIBANDING_OFF
}; };
enum
{
CV_CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
CV_CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
CV_CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
CV_CAP_INTELPERC_IMAGE = 3
};
/* retrieve or set capture properties */ /* retrieve or set capture properties */
CVAPI(double) cvGetCaptureProperty( CvCapture* capture, int property_id ); CVAPI(double) cvGetCaptureProperty( CvCapture* capture, int property_id );
CVAPI(int) cvSetCaptureProperty( CvCapture* capture, int property_id, double value ); CVAPI(int) cvSetCaptureProperty( CvCapture* capture, int property_id, double value );

View File

@ -155,6 +155,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
#endif #endif
#ifdef HAVE_GIGE_API #ifdef HAVE_GIGE_API
CV_CAP_GIGANETIX, CV_CAP_GIGANETIX,
#endif
#ifdef HAVE_INTELPERC
CV_CAP_INTELPERC,
#endif #endif
-1 -1
}; };
@ -193,6 +196,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
defined(HAVE_AVFOUNDATION) || \ defined(HAVE_AVFOUNDATION) || \
defined(HAVE_ANDROID_NATIVE_CAMERA) || \ defined(HAVE_ANDROID_NATIVE_CAMERA) || \
defined(HAVE_GIGE_API) || \ defined(HAVE_GIGE_API) || \
defined(HAVE_INTELPERC) || \
(0) (0)
// local variable to memorize the captured device // local variable to memorize the captured device
CvCapture *capture; CvCapture *capture;
@ -342,6 +346,13 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
break; // CV_CAP_GIGANETIX break; // CV_CAP_GIGANETIX
#endif #endif
#ifdef HAVE_INTELPERC
case CV_CAP_INTELPERC:
capture = cvCreateCameraCapture_IntelPerC(index);
if (capture)
return capture;
break; // CV_CAP_INTEL_PERC
#endif
} }
} }

View File

@ -0,0 +1,714 @@
#include "precomp.hpp"
#ifdef HAVE_INTELPERC
#include "pxcsession.h"
#include "pxcsmartptr.h"
#include "pxccapture.h"
class CvIntelPerCStreamBase
{
protected:
struct FrameInternal
{
IplImage* retrieveFrame()
{
if (m_mat.empty())
return NULL;
m_iplHeader = IplImage(m_mat);
return &m_iplHeader;
}
cv::Mat m_mat;
private:
IplImage m_iplHeader;
};
public:
CvIntelPerCStreamBase()
: m_profileIdx(-1)
, m_frameIdx(0)
, m_timeStampStartNS(0)
{
}
virtual ~CvIntelPerCStreamBase()
{
}
bool isValid()
{
return (m_device.IsValid() && m_stream.IsValid());
}
bool grabFrame()
{
if (!m_stream.IsValid())
return false;
if (-1 == m_profileIdx)
{
if (!setProperty(CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0))
return false;
}
PXCSmartPtr<PXCImage> pxcImage; PXCSmartSP sp;
if (PXC_STATUS_NO_ERROR > m_stream->ReadStreamAsync(&pxcImage, &sp))
return false;
if (PXC_STATUS_NO_ERROR > sp->Synchronize())
return false;
if (0 == m_timeStampStartNS)
m_timeStampStartNS = pxcImage->QueryTimeStamp();
m_timeStamp = (double)((pxcImage->QueryTimeStamp() - m_timeStampStartNS) / 10000);
m_frameIdx++;
return prepareIplImage(pxcImage);
}
int getProfileIDX() const
{
return m_profileIdx;
}
public:
virtual bool initStream(PXCSession *session) = 0;
virtual double getProperty(int propIdx)
{
double ret = 0.0;
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_PROFILE_COUNT:
ret = (double)m_profiles.size();
break;
case CV_CAP_PROP_FRAME_WIDTH :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
ret = (double)m_profiles[m_profileIdx].imageInfo.width;
break;
case CV_CAP_PROP_FRAME_HEIGHT :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
ret = (double)m_profiles[m_profileIdx].imageInfo.height;
break;
case CV_CAP_PROP_FPS :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
{
ret = ((double)m_profiles[m_profileIdx].frameRateMin.numerator / (double)m_profiles[m_profileIdx].frameRateMin.denominator
+ (double)m_profiles[m_profileIdx].frameRateMax.numerator / (double)m_profiles[m_profileIdx].frameRateMax.denominator) / 2.0;
}
break;
case CV_CAP_PROP_POS_FRAMES:
ret = (double)m_frameIdx;
break;
case CV_CAP_PROP_POS_MSEC:
ret = m_timeStamp;
break;
};
return ret;
}
virtual bool setProperty(int propIdx, double propVal)
{
bool isSet = false;
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_PROFILE_IDX:
{
int propValInt = (int)propVal;
if ((0 <= propValInt) && (propValInt < m_profiles.size()))
{
if (m_profileIdx != propValInt)
{
m_profileIdx = propValInt;
if (m_stream.IsValid())
m_stream->SetProfile(&m_profiles[m_profileIdx]);
m_frameIdx = 0;
m_timeStampStartNS = 0;
}
isSet = true;
}
}
break;
};
return isSet;
}
protected:
PXCSmartPtr<PXCCapture::Device> m_device;
bool initDevice(PXCSession *session)
{
if (NULL == session)
return false;
pxcStatus sts = PXC_STATUS_NO_ERROR;
PXCSession::ImplDesc templat;
memset(&templat,0,sizeof(templat));
templat.group = PXCSession::IMPL_GROUP_SENSOR;
templat.subgroup= PXCSession::IMPL_SUBGROUP_VIDEO_CAPTURE;
for (int modidx = 0; PXC_STATUS_NO_ERROR <= sts; modidx++)
{
PXCSession::ImplDesc desc;
sts = session->QueryImpl(&templat, modidx, &desc);
if (PXC_STATUS_NO_ERROR > sts)
break;
PXCSmartPtr<PXCCapture> capture;
sts = session->CreateImpl<PXCCapture>(&desc, &capture);
if (!capture.IsValid())
continue;
/* enumerate devices */
for (int devidx = 0; PXC_STATUS_NO_ERROR <= sts; devidx++)
{
PXCSmartPtr<PXCCapture::Device> device;
sts = capture->CreateDevice(devidx, &device);
if (PXC_STATUS_NO_ERROR <= sts)
{
m_device = device.ReleasePtr();
return true;
}
}
}
return false;
}
PXCSmartPtr<PXCCapture::VideoStream> m_stream;
void initStreamImpl(PXCImage::ImageType type)
{
if (!m_device.IsValid())
return;
pxcStatus sts = PXC_STATUS_NO_ERROR;
/* enumerate streams */
for (int streamidx = 0; PXC_STATUS_NO_ERROR <= sts; streamidx++)
{
PXCCapture::Device::StreamInfo sinfo;
sts = m_device->QueryStream(streamidx, &sinfo);
if (PXC_STATUS_NO_ERROR > sts)
break;
if (PXCCapture::VideoStream::CUID != sinfo.cuid)
continue;
if (type != sinfo.imageType)
continue;
sts = m_device->CreateStream<PXCCapture::VideoStream>(streamidx, &m_stream);
if (PXC_STATUS_NO_ERROR == sts)
break;
m_stream.ReleaseRef();
}
}
protected:
std::vector<PXCCapture::VideoStream::ProfileInfo> m_profiles;
int m_profileIdx;
int m_frameIdx;
pxcU64 m_timeStampStartNS;
double m_timeStamp;
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& /*pinfo*/)
{
return true;
}
void enumProfiles()
{
m_profiles.clear();
if (!m_stream.IsValid())
return;
pxcStatus sts = PXC_STATUS_NO_ERROR;
for (int profidx = 0; PXC_STATUS_NO_ERROR <= sts; profidx++)
{
PXCCapture::VideoStream::ProfileInfo pinfo;
sts = m_stream->QueryProfile(profidx, &pinfo);
if (PXC_STATUS_NO_ERROR > sts)
break;
if (validProfile(pinfo))
m_profiles.push_back(pinfo);
}
}
virtual bool prepareIplImage(PXCImage *pxcImage) = 0;
};
class CvIntelPerCStreamImage
: public CvIntelPerCStreamBase
{
public:
CvIntelPerCStreamImage()
{
}
virtual ~CvIntelPerCStreamImage()
{
}
virtual bool initStream(PXCSession *session)
{
if (!initDevice(session))
return false;
initStreamImpl(PXCImage::IMAGE_TYPE_COLOR);
if (!m_stream.IsValid())
return false;
enumProfiles();
return true;
}
virtual double getProperty(int propIdx)
{
switch (propIdx)
{
case CV_CAP_PROP_BRIGHTNESS:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_CONTRAST:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_SATURATION:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_HUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_GAMMA:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_SHARPNESS:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_GAIN:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_BACKLIGHT:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_EXPOSURE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, &fret))
return (double)fret;
return 0.0;
}
break;
//Add image stream specific properties
}
return CvIntelPerCStreamBase::getProperty(propIdx);
}
virtual bool setProperty(int propIdx, double propVal)
{
switch (propIdx)
{
case CV_CAP_PROP_BRIGHTNESS:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, (float)propVal));
}
break;
case CV_CAP_PROP_CONTRAST:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, (float)propVal));
}
break;
case CV_CAP_PROP_SATURATION:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, (float)propVal));
}
break;
case CV_CAP_PROP_HUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, (float)propVal));
}
break;
case CV_CAP_PROP_GAMMA:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, (float)propVal));
}
break;
case CV_CAP_PROP_SHARPNESS:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, (float)propVal));
}
break;
case CV_CAP_PROP_GAIN:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, (float)propVal));
}
break;
case CV_CAP_PROP_BACKLIGHT:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, (float)propVal));
}
break;
case CV_CAP_PROP_EXPOSURE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, (float)propVal));
}
break;
//Add image stream specific properties
}
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
}
public:
IplImage* retrieveFrame()
{
return m_frame.retrieveFrame();
}
protected:
FrameInternal m_frame;
bool prepareIplImage(PXCImage *pxcImage)
{
if (NULL == pxcImage)
return false;
PXCImage::ImageInfo info;
pxcImage->QueryInfo(&info);
PXCImage::ImageData data;
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, PXCImage::COLOR_FORMAT_RGB24, &data);
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
return false;
cv::Mat temp(info.height, info.width, CV_8UC3, data.planes[0], data.pitches[0]);
temp.copyTo(m_frame.m_mat);
pxcImage->ReleaseAccess(&data);
return true;
}
};
class CvIntelPerCStreamDepth
: public CvIntelPerCStreamBase
{
public:
CvIntelPerCStreamDepth()
{
}
virtual ~CvIntelPerCStreamDepth()
{
}
virtual bool initStream(PXCSession *session)
{
if (!initDevice(session))
return false;
initStreamImpl(PXCImage::IMAGE_TYPE_DEPTH);
if (!m_stream.IsValid())
return false;
enumProfiles();
return true;
}
virtual double getProperty(int propIdx)
{
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ:
{
if (!m_device.IsValid())
return 0.0f;
PXCPointF32 ptf;
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
return (double)ptf.x;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT:
{
if (!m_device.IsValid())
return 0.0f;
PXCPointF32 ptf;
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
return (double)ptf.y;
return 0.0;
}
break;
//Add depth stream sepcific properties
}
return CvIntelPerCStreamBase::getProperty(propIdx);
}
virtual bool setProperty(int propIdx, double propVal)
{
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, (float)propVal));
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, (float)propVal));
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, (float)propVal));
}
break;
//Add depth stream sepcific properties
}
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
}
public:
IplImage* retrieveDepthFrame()
{
return m_frameDepth.retrieveFrame();
}
IplImage* retrieveIRFrame()
{
return m_frameIR.retrieveFrame();
}
IplImage* retrieveUVFrame()
{
return m_frameUV.retrieveFrame();
}
protected:
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& pinfo)
{
return (PXCImage::COLOR_FORMAT_DEPTH == pinfo.imageInfo.format);
}
protected:
FrameInternal m_frameDepth;
FrameInternal m_frameIR;
FrameInternal m_frameUV;
bool prepareIplImage(PXCImage *pxcImage)
{
if (NULL == pxcImage)
return false;
PXCImage::ImageInfo info;
pxcImage->QueryInfo(&info);
PXCImage::ImageData data;
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, &data);
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
return false;
if (PXCImage::COLOR_FORMAT_DEPTH != data.format)
return false;
{
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[0], data.pitches[0]);
temp.copyTo(m_frameDepth.m_mat);
}
{
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[1], data.pitches[1]);
temp.copyTo(m_frameIR.m_mat);
}
{
cv::Mat temp(info.height, info.width, CV_32FC2, data.planes[2], data.pitches[2]);
temp.copyTo(m_frameUV.m_mat);
}
pxcImage->ReleaseAccess(&data);
return true;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CvCapture_IntelPerC : public CvCapture
{
public:
CvCapture_IntelPerC(int /*index*/)
: m_contextOpened(false)
{
pxcStatus sts = PXCSession_Create(&m_session);
if (PXC_STATUS_NO_ERROR > sts)
return;
m_contextOpened = m_imageStream.initStream(m_session);
m_contextOpened &= m_depthStream.initStream(m_session);
}
virtual ~CvCapture_IntelPerC(){}
virtual double getProperty(int propIdx)
{
double propValue = 0;
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
propValue = m_imageStream.getProperty(purePropIdx);
}
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
propValue = m_depthStream.getProperty(purePropIdx);
}
else
{
propValue = m_depthStream.getProperty(purePropIdx);
}
return propValue;
}
virtual bool setProperty(int propIdx, double propVal)
{
bool isSet = false;
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
isSet = m_imageStream.setProperty(purePropIdx, propVal);
}
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
isSet = m_depthStream.setProperty(purePropIdx, propVal);
}
else
{
isSet = m_depthStream.setProperty(purePropIdx, propVal);
}
return isSet;
}
bool grabFrame()
{
if (!isOpened())
return false;
bool isGrabbed = false;
if (m_depthStream.isValid())
isGrabbed = m_depthStream.grabFrame();
if ((m_imageStream.isValid()) && (-1 != m_imageStream.getProfileIDX()))
isGrabbed &= m_imageStream.grabFrame();
return isGrabbed;
}
virtual IplImage* retrieveFrame(int outputType)
{
IplImage* image = 0;
switch (outputType)
{
case CV_CAP_INTELPERC_DEPTH_MAP:
image = m_depthStream.retrieveDepthFrame();
break;
case CV_CAP_INTELPERC_UVDEPTH_MAP:
image = m_depthStream.retrieveUVFrame();
break;
case CV_CAP_INTELPERC_IR_MAP:
image = m_depthStream.retrieveIRFrame();
break;
case CV_CAP_INTELPERC_IMAGE:
image = m_imageStream.retrieveFrame();
break;
}
CV_Assert(NULL != image);
return image;
}
bool isOpened() const
{
return m_contextOpened;
}
protected:
bool m_contextOpened;
PXCSmartPtr<PXCSession> m_session;
CvIntelPerCStreamImage m_imageStream;
CvIntelPerCStreamDepth m_depthStream;
};
CvCapture* cvCreateCameraCapture_IntelPerC(int index)
{
CvCapture_IntelPerC* capture = new CvCapture_IntelPerC(index);
if( capture->isOpened() )
return capture;
delete capture;
return 0;
}
#endif //HAVE_INTELPERC

View File

@ -128,6 +128,7 @@ CvCapture* cvCreateFileCapture_OpenNI( const char* filename );
CvCapture* cvCreateCameraCapture_Android( int index ); CvCapture* cvCreateCameraCapture_Android( int index );
CvCapture* cvCreateCameraCapture_XIMEA( int index ); CvCapture* cvCreateCameraCapture_XIMEA( int index );
CvCapture* cvCreateCameraCapture_AVFoundation(int index); CvCapture* cvCreateCameraCapture_AVFoundation(int index);
CvCapture* cvCreateCameraCapture_IntelPerC(int index);
CVAPI(int) cvHaveImageReader(const char* filename); CVAPI(int) cvHaveImageReader(const char* filename);

View File

@ -35,6 +35,7 @@
defined(HAVE_XIMEA) || \ defined(HAVE_XIMEA) || \
defined(HAVE_AVFOUNDATION) || \ defined(HAVE_AVFOUNDATION) || \
defined(HAVE_GIGE_API) || \ defined(HAVE_GIGE_API) || \
defined(HAVE_INTELPERC) || \
(0) (0)
//defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193 //defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193
# define BUILD_WITH_CAMERA_SUPPORT 1 # define BUILD_WITH_CAMERA_SUPPORT 1

View File

@ -3299,7 +3299,10 @@ public:
if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) ) if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
{ {
bufxy = (*m1)(Rect(x, y, bcols, brows)); bufxy = (*m1)(Rect(x, y, bcols, brows));
bufa = (*m2)(Rect(x, y, bcols, brows));
const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
for( x1 = 0; x1 < bcols; x1++ )
A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
} }
else if( planar_input ) else if( planar_input )
{ {
@ -3680,7 +3683,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
{ {
for( x = 0; x < size.width; x++ ) for( x = 0; x < size.width; x++ )
{ {
int fxy = src2 ? src2[x] : 0; int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale; dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale; dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
} }
@ -3689,7 +3692,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
{ {
for( x = 0; x < size.width; x++ ) for( x = 0; x < size.width; x++ )
{ {
int fxy = src2 ? src2[x] : 0; int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale; dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale; dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
} }

View File

@ -18,6 +18,8 @@ class_ignore_list = (
const_ignore_list = ( const_ignore_list = (
"CV_CAP_OPENNI", "CV_CAP_OPENNI",
"CV_CAP_PROP_OPENNI_", "CV_CAP_PROP_OPENNI_",
"CV_CAP_INTELPERC",
"CV_CAP_PROP_INTELPERC_"
"WINDOW_AUTOSIZE", "WINDOW_AUTOSIZE",
"CV_WND_PROP_", "CV_WND_PROP_",
"CV_WINDOW_", "CV_WINDOW_",

View File

@ -12,6 +12,7 @@
// //
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners. // Third party copyrights are property of their respective owners.
// //
// @Authors // @Authors
@ -66,8 +67,8 @@ uint read_sumTex(IMAGE_INT32 img, sampler_t sam, int2 coord, int rows, int cols,
uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int cols, int elemPerRow) uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int cols, int elemPerRow)
{ {
#ifdef DISABLE_IMAGE2D #ifdef DISABLE_IMAGE2D
int x = clamp(convert_int_rte(coord.x), 0, cols - 1); int x = clamp(round(coord.x), 0, cols - 1);
int y = clamp(convert_int_rte(coord.y), 0, rows - 1); int y = clamp(round(coord.y), 0, rows - 1);
return img[elemPerRow * y + x]; return img[elemPerRow * y + x];
#else #else
return (uchar)read_imageui(img, sam, coord).x; return (uchar)read_imageui(img, sam, coord).x;
@ -98,6 +99,7 @@ __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAM
#define CV_PI_F 3.14159265f #define CV_PI_F 3.14159265f
#endif #endif
// Use integral image to calculate haar wavelets. // Use integral image to calculate haar wavelets.
// N = 2 // N = 2
// for simple haar paatern // for simple haar paatern
@ -114,10 +116,10 @@ float icvCalcHaarPatternSum_2(
F d = 0; F d = 0;
int2 dx1 = convert_int2_rte(ratio * src[0]); int2 dx1 = convert_int2(round(ratio * src[0]));
int2 dy1 = convert_int2_rte(ratio * src[1]); int2 dy1 = convert_int2(round(ratio * src[1]));
int2 dx2 = convert_int2_rte(ratio * src[2]); int2 dx2 = convert_int2(round(ratio * src[2]));
int2 dy2 = convert_int2_rte(ratio * src[3]); int2 dy2 = convert_int2(round(ratio * src[3]));
F t = 0; F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow ); t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
@ -136,106 +138,9 @@ float icvCalcHaarPatternSum_2(
return (float)d; return (float)d;
} }
// N = 3
float icvCalcHaarPatternSum_3(
IMAGE_INT32 sumTex,
__constant float4 *src,
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
F d = 0;
int4 dx1 = convert_int4_rte(ratio * src[0]);
int4 dy1 = convert_int4_rte(ratio * src[1]);
int4 dx2 = convert_int4_rte(ratio * src[2]);
int4 dy2 = convert_int4_rte(ratio * src[3]);
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
return (float)d;
}
// N = 4
float icvCalcHaarPatternSum_4(
IMAGE_INT32 sumTex,
__constant float4 *src,
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
F d = 0;
int4 dx1 = convert_int4_rte(ratio * src[0]);
int4 dy1 = convert_int4_rte(ratio * src[1]);
int4 dx2 = convert_int4_rte(ratio * src[2]);
int4 dy2 = convert_int4_rte(ratio * src[3]);
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy1.w), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy2.w), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy1.w), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy2.w), rows, cols, elemPerRow );
d += t * src[4].w / ((dx2.w - dx1.w) * (dy2.w - dy1.w));
return (float)d;
}
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Hessian // Hessian
__constant float4 c_DX[5] = { (float4)(0, 3, 6, 0), (float4)(2, 2, 2, 0), (float4)(3, 6, 9, 0), (float4)(7, 7, 7, 0), (float4)(1, -2, 1, 0) };
__constant float4 c_DY[5] = { (float4)(2, 2, 2, 0), (float4)(0, 3, 6, 0), (float4)(7, 7, 7, 0), (float4)(3, 6, 9, 0), (float4)(1, -2, 1, 0) };
__constant float4 c_DXY[5] = { (float4)(1, 5, 1, 5), (float4)(1, 1, 5, 5), (float4)(4, 8, 4, 8), (float4)(4, 4, 8, 8), (float4)(1, -1, -1, 1) };// Use integral image to calculate haar wavelets.
__inline int calcSize(int octave, int layer) __inline int calcSize(int octave, int layer)
{ {
/* Wavelet size at first layer of first octave. */ /* Wavelet size at first layer of first octave. */
@ -250,6 +155,24 @@ __inline int calcSize(int octave, int layer)
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave; return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
} }
// Calculate a derivative in an axis-aligned direction (x or y). The "plus1"
// boxes contribute 1 * (area), and the "minus2" box contributes -2 * (area).
// So the final computation is plus1a + plus1b - 2 * minus2. The corners are
// labeled A, B, C, and D, with A being the top left, B being top right, C
// being bottom left, and D being bottom right.
F calcAxisAlignedDerivative(
int plus1a_A, int plus1a_B, int plus1a_C, int plus1a_D, F plus1a_scale,
int plus1b_A, int plus1b_B, int plus1b_C, int plus1b_D, F plus1b_scale,
int minus2_A, int minus2_B, int minus2_C, int minus2_D, F minus2_scale)
{
F plus1a = plus1a_A - plus1a_B - plus1a_C + plus1a_D;
F plus1b = plus1b_A - plus1b_B - plus1b_C + plus1b_D;
F minus2 = minus2_A - minus2_B - minus2_C + minus2_D;
return (plus1a / plus1a_scale -
2.0f * minus2 / minus2_scale +
plus1b / plus1b_scale);
}
//calculate targeted layer per-pixel determinant and trace with an integral image //calculate targeted layer per-pixel determinant and trace with an integral image
__kernel void icvCalcLayerDetAndTrace( __kernel void icvCalcLayerDetAndTrace(
@ -264,7 +187,7 @@ __kernel void icvCalcLayerDetAndTrace(
int c_octave, int c_octave,
int c_layer_rows, int c_layer_rows,
int sumTex_step int sumTex_step
) )
{ {
det_step /= sizeof(*det); det_step /= sizeof(*det);
trace_step /= sizeof(*trace); trace_step /= sizeof(*trace);
@ -288,16 +211,103 @@ __kernel void icvCalcLayerDetAndTrace(
if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j) if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
{ {
const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step); int x = j << c_octave;
const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step); int y = i << c_octave;
const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
float ratio = (float)size / 9;
// Precompute some commonly used values, which are used to offset
// texture coordinates in the integral image.
int r1 = round(ratio);
int r2 = round(ratio * 2.0f);
int r3 = round(ratio * 3.0f);
int r4 = round(ratio * 4.0f);
int r5 = round(ratio * 5.0f);
int r6 = round(ratio * 6.0f);
int r7 = round(ratio * 7.0f);
int r8 = round(ratio * 8.0f);
int r9 = round(ratio * 9.0f);
// Calculate the approximated derivative in the x-direction
F d = 0;
{
// Some of the pixels needed to compute the derivative are
// repeated, so we only don't duplicate the fetch here.
int t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t07 = read_sumTex( sumTex, sampler, (int2)(x, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t32 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t37 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t62 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t67 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t92 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t97 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r7), c_img_rows, c_img_cols, sumTex_step );
d = calcAxisAlignedDerivative(t02, t07, t32, t37, (r3) * (r7 - r2),
t62, t67, t92, t97, (r9 - r6) * (r7 - r2),
t32, t37, t62, t67, (r6 - r3) * (r7 - r2));
}
const float dx = (float)d;
// Calculate the approximated derivative in the y-direction
d = 0;
{
// Some of the pixels needed to compute the derivative are
// repeated, so we only don't duplicate the fetch here.
int t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sumTex_step );
int t23 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r3), c_img_rows, c_img_cols, sumTex_step );
int t70 = read_sumTex( sumTex, sampler, (int2)(x + r7, y), c_img_rows, c_img_cols, sumTex_step );
int t73 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r3), c_img_rows, c_img_cols, sumTex_step );
int t26 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r6), c_img_rows, c_img_cols, sumTex_step );
int t76 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r6), c_img_rows, c_img_cols, sumTex_step );
int t29 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r9), c_img_rows, c_img_cols, sumTex_step );
int t79 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r9), c_img_rows, c_img_cols, sumTex_step );
d = calcAxisAlignedDerivative(t20, t23, t70, t73, (r7 - r2) * (r3),
t26, t29, t76, t79, (r7 - r2) * (r9 - r6),
t23, t26, t73, t76, (r7 - r2) * (r6 - r3));
}
const float dy = (float)d;
// Calculate the approximated derivative in the xy-direction
d = 0;
{
// There's no saving us here, we just have to get all of the pixels in
// separate fetches
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r1), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r4), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r1), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sumTex_step );
d += t / ((r4 - r1) * (r4 - r1));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r1), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r4), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r1), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r4), c_img_rows, c_img_cols, sumTex_step );
d -= t / ((r8 - r5) * (r4 - r1));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r5), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r8), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r5), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r8), c_img_rows, c_img_cols, sumTex_step );
d -= t / ((r4 - r1) * (r8 - r5));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r5), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r8), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r5), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r8), c_img_rows, c_img_cols, sumTex_step );
d += t / ((r8 - r5) * (r8 - r5));
}
const float dxy = (float)d;
det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy; det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy; trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy;
} }
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// NONMAX // NONMAX
@ -309,10 +319,10 @@ bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int ro
float d = 0; float d = 0;
int dx1 = convert_int_rte(ratio * c_DM[0]); int dx1 = round(ratio * c_DM[0]);
int dy1 = convert_int_rte(ratio * c_DM[1]); int dy1 = round(ratio * c_DM[1]);
int dx2 = convert_int_rte(ratio * c_DM[2]); int dx2 = round(ratio * c_DM[2]);
int dy2 = convert_int_rte(ratio * c_DM[3]); int dy2 = round(ratio * c_DM[3]);
float t = 0; float t = 0;
@ -572,7 +582,7 @@ void icvFindMaximaInLayer(
} }
// solve 3x3 linear system Ax=b for floating point input // solve 3x3 linear system Ax=b for floating point input
inline bool solve3x3_float(volatile __local const float4 *A, volatile __local const float *b, volatile __local float *x) inline bool solve3x3_float(const float4 *A, const float *b, float *x)
{ {
float det = A[0].x * (A[1].y * A[2].z - A[1].z * A[2].y) float det = A[0].x * (A[1].y * A[2].z - A[1].z * A[2].y)
- A[0].y * (A[1].x * A[2].z - A[1].z * A[2].x) - A[0].y * (A[1].x * A[2].z - A[1].z * A[2].x)
@ -651,7 +661,7 @@ void icvInterpolateKeypoint(
if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0) if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0)
{ {
volatile __local float dD[3]; float dD[3];
//dx //dx
dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]); dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]);
@ -660,7 +670,7 @@ void icvInterpolateKeypoint(
//ds //ds
dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]); dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]);
volatile __local float4 H[3]; float4 H[3];
//dxx //dxx
H[0].x = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2]; H[0].x = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
@ -681,7 +691,7 @@ void icvInterpolateKeypoint(
//dss //dss
H[2].z = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1]; H[2].z = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
volatile __local float x[3]; float x[3];
if (solve3x3_float(H, dD, x)) if (solve3x3_float(H, dD, x))
{ {
@ -711,7 +721,7 @@ void icvInterpolateKeypoint(
sampled in a circle of radius 6s using wavelets of size 4s. sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the We ensure the gradient wavelet size is even to ensure the
wavelet pattern is balanced and symmetric around its center */ wavelet pattern is balanced and symmetric around its center */
const int grad_wav_size = 2 * convert_int_rte(2.0f * s); const int grad_wav_size = 2 * round(2.0f * s);
// check when grad_wav_size is too big // check when grad_wav_size is too big
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size) if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
@ -737,9 +747,12 @@ void icvInterpolateKeypoint(
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Orientation // Orientation
#define ORI_SEARCH_INC 5 #define ORI_WIN 60
#define ORI_WIN 60 #define ORI_SAMPLES 113
#define ORI_SAMPLES 113
// The distance between samples in the beginning of the the reduction
#define ORI_RESPONSE_REDUCTION_WIDTH 48
#define ORI_RESPONSE_ARRAY_SIZE (ORI_RESPONSE_REDUCTION_WIDTH * 2)
__constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6}; __constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
__constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0}; __constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
@ -833,12 +846,15 @@ void icvCalcOrientation(
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step; __global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
volatile __local float s_X[128]; __local float s_X[ORI_SAMPLES];
volatile __local float s_Y[128]; __local float s_Y[ORI_SAMPLES];
volatile __local float s_angle[128]; __local float s_angle[ORI_SAMPLES];
volatile __local float s_sumx[32 * 4]; // Need to allocate enough to make the reduction work without accessing
volatile __local float s_sumy[32 * 4]; // past the end of the array.
__local float s_sumx[ORI_RESPONSE_ARRAY_SIZE];
__local float s_sumy[ORI_RESPONSE_ARRAY_SIZE];
__local float s_mod[ORI_RESPONSE_ARRAY_SIZE];
/* The sampling intervals and wavelet sized for selecting an orientation /* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */ and building the keypoint descriptor are defined relative to 's' */
@ -849,28 +865,60 @@ void icvCalcOrientation(
sampled in a circle of radius 6s using wavelets of size 4s. sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the We ensure the gradient wavelet size is even to ensure the
wavelet pattern is balanced and symmetric around its center */ wavelet pattern is balanced and symmetric around its center */
const int grad_wav_size = 2 * convert_int_rte(2.0f * s); const int grad_wav_size = 2 * round(2.0f * s);
// check when grad_wav_size is too big // check when grad_wav_size is too big
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size) if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
return; return;
// Calc X, Y, angle and store it to shared memory // Calc X, Y, angle and store it to shared memory
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0); const int tid = get_local_id(0);
// Initialize values that are only used as part of the reduction later.
if (tid < ORI_RESPONSE_ARRAY_SIZE - ORI_LOCAL_SIZE) {
s_mod[tid + ORI_LOCAL_SIZE] = 0.0f;
}
float X = 0.0f, Y = 0.0f, angle = 0.0f; float ratio = (float)grad_wav_size / 4;
if (tid < ORI_SAMPLES) int r2 = round(ratio * 2.0);
int r4 = round(ratio * 4.0);
for (int i = tid; i < ORI_SAMPLES; i += ORI_LOCAL_SIZE )
{ {
float X = 0.0f, Y = 0.0f, angle = 0.0f;
const float margin = (float)(grad_wav_size - 1) / 2.0f; const float margin = (float)(grad_wav_size - 1) / 2.0f;
const int x = convert_int_rte(featureX[get_group_id(0)] + c_aptX[tid] * s - margin); const int x = round(featureX[get_group_id(0)] + c_aptX[i] * s - margin);
const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin); const int y = round(featureY[get_group_id(0)] + c_aptY[i] * s - margin);
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size && if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
x >= 0 && x < (c_img_cols + 1) - grad_wav_size) x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
{ {
X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step); float apt = c_aptW[i];
// Compute the haar sum without fetching duplicate pixels.
float t00 = read_sumTex( sumTex, sampler, (int2)(x, y), c_img_rows, c_img_cols, sum_step);
float t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sum_step);
float t04 = read_sumTex( sumTex, sampler, (int2)(x, y + r4), c_img_rows, c_img_cols, sum_step);
float t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sum_step);
float t24 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r4), c_img_rows, c_img_cols, sum_step);
float t40 = read_sumTex( sumTex, sampler, (int2)(x + r4, y), c_img_rows, c_img_cols, sum_step);
float t42 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r2), c_img_rows, c_img_cols, sum_step);
float t44 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sum_step);
F t = t00 - t04 - t20 + t24;
X -= t / ((r2) * (r4));
t = t20 - t24 - t40 + t44;
X += t / ((r4 - r2) * (r4));
t = t00 - t02 - t40 + t42;
Y += t / ((r2) * (r4));
t = t02 - t04 - t42 + t44;
Y -= t / ((r4) * (r4 - r2));
X = apt*X;
Y = apt*Y;
angle = atan2(Y, X); angle = atan2(Y, X);
@ -879,76 +927,61 @@ void icvCalcOrientation(
angle *= 180.0f / CV_PI_F; angle *= 180.0f / CV_PI_F;
} }
s_X[i] = X;
s_Y[i] = Y;
s_angle[i] = angle;
} }
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
float bestx = 0, besty = 0, best_mod = 0; float bestx = 0, besty = 0, best_mod = 0;
float sumx = 0.0f, sumy = 0.0f;
const int dir = tid * ORI_SEARCH_INC;
#pragma unroll
for (int i = 0; i < ORI_SAMPLES; ++i) {
int angle = round(s_angle[i]);
#pragma unroll int d = abs(angle - dir);
for (int i = 0; i < 18; ++i) if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{ {
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC; sumx += s_X[i];
sumy += s_Y[i];
}
}
s_sumx[tid] = sumx;
s_sumy[tid] = sumy;
s_mod[tid] = sumx*sumx + sumy*sumy;
barrier(CLK_LOCAL_MEM_FENCE);
volatile float sumx = 0.0f, sumy = 0.0f; // This reduction searches for the longest wavelet response vector. The first
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir); // step uses all of the work items in the workgroup to narrow the search
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) // down to the three candidates. It requires s_mod to have a few more
{ // elements alocated past the work-group size, which are pre-initialized to
sumx = s_X[get_local_id(0)]; // 0.0f above.
sumy = s_Y[get_local_id(0)]; for(int t = ORI_RESPONSE_REDUCTION_WIDTH; t >= 3; t /= 2) {
} if (tid < t) {
d = abs(convert_int_rte(s_angle[get_local_id(0) + 32]) - dir); if (s_mod[tid] < s_mod[tid + t]) {
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2) s_mod[tid] = s_mod[tid + t];
{ s_sumx[tid] = s_sumx[tid + t];
sumx += s_X[get_local_id(0) + 32]; s_sumy[tid] = s_sumy[tid + t];
sumy += s_Y[get_local_id(0) + 32]; }
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 64]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 64];
sumy += s_Y[get_local_id(0) + 64];
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 96]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96];
}
reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
{
best_mod = temp_mod;
bestx = sumx;
besty = sumy;
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
} }
if (get_local_id(0) == 0)
{
s_X[get_local_id(1)] = bestx;
s_Y[get_local_id(1)] = besty;
s_angle[get_local_id(1)] = best_mod;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(1) == 0 && get_local_id(0) == 0) // Do the final reduction and write out the result.
if (tid == 0)
{ {
int bestIdx = 0; int bestIdx = 0;
if (s_angle[1] > s_angle[bestIdx]) // The loop above narrowed the search of the longest vector to three
// possibilities. Pick the best here.
if (s_mod[1] > s_mod[bestIdx])
bestIdx = 1; bestIdx = 1;
if (s_angle[2] > s_angle[bestIdx]) if (s_mod[2] > s_mod[bestIdx])
bestIdx = 2; bestIdx = 2;
if (s_angle[3] > s_angle[bestIdx])
bestIdx = 3;
float kp_dir = atan2(s_Y[bestIdx], s_X[bestIdx]); float kp_dir = atan2(s_sumy[bestIdx], s_sumx[bestIdx]);
if (kp_dir < 0) if (kp_dir < 0)
kp_dir += 2.0f * CV_PI_F; kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F; kp_dir *= 180.0f / CV_PI_F;
@ -961,7 +994,6 @@ void icvCalcOrientation(
} }
} }
__kernel __kernel
void icvSetUpright( void icvSetUpright(
__global float * keypoints, __global float * keypoints,
@ -1035,8 +1067,8 @@ inline float linearFilter(
float out = 0.0f; float out = 0.0f;
const int x1 = convert_int_rtn(x); const int x1 = round(x);
const int y1 = convert_int_rtn(y); const int y1 = round(y);
const int x2 = x1 + 1; const int x2 = x1 + 1;
const int y2 = y1 + 1; const int y2 = y1 + 1;

View File

@ -46,6 +46,7 @@
#ifdef HAVE_OPENCV_OCL #ifdef HAVE_OPENCV_OCL
#include <cstdio> #include <cstdio>
#include <sstream>
#include "opencl_kernels.hpp" #include "opencl_kernels.hpp"
using namespace cv; using namespace cv;
@ -57,18 +58,25 @@ namespace cv
{ {
namespace ocl namespace ocl
{ {
// The number of degrees between orientation samples in calcOrientation
const static int ORI_SEARCH_INC = 5;
// The local size of the calcOrientation kernel
const static int ORI_LOCAL_SIZE = (360 / ORI_SEARCH_INC);
static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3], static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth) size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{ {
char optBuf [100] = {0}; std::stringstream optsStr;
char * optBufPtr = optBuf; optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
cl_kernel kernel; cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr); kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optsStr.str().c_str());
size_t wave_size = queryWaveFrontSize(kernel); size_t wave_size = queryWaveFrontSize(kernel);
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS); CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size)); optsStr << "-D WAVE_SIZE=" << wave_size;
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr); openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
} }
} }
} }
@ -601,8 +609,8 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
size_t localThreads[3] = {32, 4, 1}; size_t localThreads[3] = {ORI_LOCAL_SIZE, 1, 1};
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1}; size_t globalThreads[3] = {nFeatures * localThreads[0], 1, 1};
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1); openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
} }

View File

@ -287,7 +287,7 @@ ocl::createSeparableLinearFilter_GPU
---------------------------------------- ----------------------------------------
Creates a separable linear filter engine. Creates a separable linear filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT) .. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. :param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
@ -303,6 +303,8 @@ Creates a separable linear filter engine.
:param bordertype: Pixel extrapolation method. :param bordertype: Pixel extrapolation method.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter` .. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
@ -334,7 +336,7 @@ ocl::createDerivFilter_GPU
------------------------------ ------------------------------
Creates a filter engine for the generalized Sobel operator. Creates a filter engine for the generalized Sobel operator.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT ) .. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. :param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
@ -348,6 +350,8 @@ Creates a filter engine for the generalized Sobel operator.
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`. :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter` .. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter`
@ -405,7 +409,7 @@ ocl::createGaussianFilter_GPU
--------------------------------- ---------------------------------
Creates a Gaussian filter engine. Creates a Gaussian filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT) .. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported. :param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported.
@ -417,6 +421,8 @@ Creates a Gaussian filter engine.
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`. :param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter` .. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter`
ocl::GaussianBlur ocl::GaussianBlur

View File

@ -695,17 +695,17 @@ namespace cv
//! returns the separable linear filter engine //! returns the separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT); const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
//! returns the separable filter engine with the specified filters //! returns the separable filter engine with the specified filters
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter, CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
const Ptr<BaseColumnFilter_GPU> &columnFilter); const Ptr<BaseColumnFilter_GPU> &columnFilter);
//! returns the Gaussian filter engine //! returns the Gaussian filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT); CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
//! returns filter engine for the generalized Sobel operator //! returns filter engine for the generalized Sobel operator
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT ); CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
//! applies Laplacian operator to the image //! applies Laplacian operator to the image
// supports only ksize = 1 and ksize = 3 // supports only ksize = 1 and ksize = 3
@ -1439,8 +1439,10 @@ namespace cv
oclMat Dx_; oclMat Dx_;
oclMat Dy_; oclMat Dy_;
oclMat eig_; oclMat eig_;
oclMat eig_minmax_;
oclMat minMaxbuf_; oclMat minMaxbuf_;
oclMat tmpCorners_; oclMat tmpCorners_;
oclMat counter_;
}; };
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_, inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,

View File

@ -56,8 +56,19 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
{ {
int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
String build_options = format("-D DEPTH_%d", src.depth()); if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
else
pixels_per_work_item = 1;
}
String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty()) if (!additionalOptions.empty())
build_options = build_options + additionalOptions; build_options = build_options + additionalOptions;
@ -66,7 +77,6 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -77,6 +87,73 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
if (!data2.empty()) if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data )); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(),
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data1.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 }; size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID #ifdef ANDROID
size_t lt[3] = { 16, 10, 1 }; size_t lt[3] = { 16, 10, 1 };
@ -89,7 +166,50 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName, static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat()) const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{ {
String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels()); int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
else
pixels_per_work_item = 1;
}
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty()) if (!additionalOptions.empty())
build_options = build_options + additionalOptions; build_options = build_options + additionalOptions;
@ -101,7 +221,6 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -119,10 +238,13 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str()); openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
} }
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse) static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{ {
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(), std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER"); if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1(); int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
@ -136,6 +258,36 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
size_t gt[3] = { dst.cols, dst.rows, 1 }; size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID #ifdef ANDROID
size_t lt[3] = { 16, 10, 1 }; size_t lt[3] = { 16, 10, 1 };
@ -147,8 +299,8 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName) static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
{ {
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d", String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
src.depth(), greenbits, dst.channels()); src.depth(), greenbits, dst.channels(), bidx);
int src_offset = src.offset >> 1, src_step = src.step >> 1; int src_offset = src.offset >> 1, src_step = src.step >> 1;
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1(); int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
@ -157,7 +309,6 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -174,8 +325,8 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName) static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
{ {
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d", String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
src.depth(), greenbits, src.channels()); src.depth(), greenbits, src.channels(), bidx);
int src_offset = (int)src.offset, src_step = (int)src.step; int src_offset = (int)src.offset, src_step = (int)src.step;
int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1; int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
@ -184,7 +335,6 @@ static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenb
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step)); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data)); args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset )); args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -272,7 +422,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
CV_Assert(scn == 1); CV_Assert(scn == 1);
dcn = code == COLOR_GRAY2BGRA ? 4 : 3; dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
dst.create(sz, CV_MAKETYPE(depth, dcn)); dst.create(sz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, 0, "Gray2RGB"); fromGray_caller(src, dst, 0, "Gray2RGB");
break; break;
} }
case COLOR_BGR2YUV: case COLOR_RGB2YUV: case COLOR_BGR2YUV: case COLOR_RGB2YUV:
@ -303,7 +453,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
Size dstSz(sz.width, sz.height * 2 / 3); Size dstSz(sz.width, sz.height * 2 / 3);
dst.create(dstSz, CV_MAKETYPE(depth, dcn)); dst.create(dstSz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12"); toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
break; break;
} }
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb: case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
@ -460,11 +610,11 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
initialized = true; initialized = true;
} }
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180); toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
return; return;
} }
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f))); toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
break; break;
} }
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL: case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
@ -483,7 +633,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
dst.create(sz, CV_MAKETYPE(depth, dcn)); dst.create(sz, CV_MAKETYPE(depth, dcn));
std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB"; std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange)); fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
break; break;
} }
case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA: case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:

View File

@ -741,6 +741,135 @@ void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &ke
f->apply(src, dst); f->apply(src, dst);
} }
const int optimizedSepFilterLocalSize = 16;
static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
const Mat &row_kernel, const Mat &col_kernel, int bordertype = BORDER_DEFAULT)
{
size_t lt2[3] = {optimizedSepFilterLocalSize, optimizedSepFilterLocalSize, 1};
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
unsigned int src_pitch = src.step;
unsigned int dst_pitch = dst.step;
int src_offset_x = (src.offset % src.step) / src.elemSize();
int src_offset_y = src.offset / src.step;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
row_kernel.rows / 2, col_kernel.rows / 2 );
option += " -D KERNEL_MATRIX_X=";
for(int i=0; i<row_kernel.rows; i++)
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
option += "0x0";
option += " -D KERNEL_MATRIX_Y=";
for(int i=0; i<col_kernel.rows; i++)
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
option += "0x0";
switch(src.type())
{
case CV_8UC1:
option += " -D SRCTYPE=uchar -D CONVERT_SRCTYPE=convert_float -D WORKTYPE=float";
break;
case CV_32FC1:
option += " -D SRCTYPE=float -D CONVERT_SRCTYPE= -D WORKTYPE=float";
break;
case CV_8UC2:
option += " -D SRCTYPE=uchar2 -D CONVERT_SRCTYPE=convert_float2 -D WORKTYPE=float2";
break;
case CV_32FC2:
option += " -D SRCTYPE=float2 -D CONVERT_SRCTYPE= -D WORKTYPE=float2";
break;
case CV_8UC3:
option += " -D SRCTYPE=uchar3 -D CONVERT_SRCTYPE=convert_float3 -D WORKTYPE=float3";
break;
case CV_32FC3:
option += " -D SRCTYPE=float3 -D CONVERT_SRCTYPE= -D WORKTYPE=float3";
break;
case CV_8UC4:
option += " -D SRCTYPE=uchar4 -D CONVERT_SRCTYPE=convert_float4 -D WORKTYPE=float4";
break;
case CV_32FC4:
option += " -D SRCTYPE=float4 -D CONVERT_SRCTYPE= -D WORKTYPE=float4";
break;
default:
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
break;
}
switch(dst.type())
{
case CV_8UC1:
option += " -D DSTTYPE=uchar -D CONVERT_DSTTYPE=convert_uchar_sat";
break;
case CV_8UC2:
option += " -D DSTTYPE=uchar2 -D CONVERT_DSTTYPE=convert_uchar2_sat";
break;
case CV_8UC3:
option += " -D DSTTYPE=uchar3 -D CONVERT_DSTTYPE=convert_uchar3_sat";
break;
case CV_8UC4:
option += " -D DSTTYPE=uchar4 -D CONVERT_DSTTYPE=convert_uchar4_sat";
break;
case CV_32FC1:
option += " -D DSTTYPE=float -D CONVERT_DSTTYPE=";
break;
case CV_32FC2:
option += " -D DSTTYPE=float2 -D CONVERT_DSTTYPE=";
break;
case CV_32FC3:
option += " -D DSTTYPE=float3 -D CONVERT_DSTTYPE=";
break;
case CV_32FC4:
option += " -D DSTTYPE=float4 -D CONVERT_DSTTYPE=";
break;
default:
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
break;
}
switch(bordertype)
{
case cv::BORDER_CONSTANT:
option += " -D BORDER_CONSTANT";
break;
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT_101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
default:
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
break;
}
openCLExecuteKernel(src.clCxt, &filtering_sep_filter_singlepass, "sep_filter_singlepass", gt2, lt2, args,
-1, -1, option.c_str() );
}
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// SeparableFilter // SeparableFilter
@ -790,6 +919,35 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter
return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter); return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
} }
namespace
{
class SingleStepSeparableFilterEngine_GPU : public FilterEngine_GPU
{
public:
SingleStepSeparableFilterEngine_GPU( const Mat &rowKernel_, const Mat &columnKernel_, const int btype )
{
bordertype = btype;
rowKernel = rowKernel_;
columnKernel = columnKernel_;
}
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
{
normalizeROI(roi, Size(rowKernel.rows, columnKernel.rows), Point(-1,-1), src.size());
oclMat srcROI = src(roi);
oclMat dstROI = dst(roi);
sepFilter2D_SinglePass(src, dst, rowKernel, columnKernel, bordertype);
}
Mat rowKernel;
Mat columnKernel;
int bordertype;
};
}
static void GPUFilterBox(const oclMat &src, oclMat &dst, static void GPUFilterBox(const oclMat &src, oclMat &dst,
Size &ksize, const Point anchor, const int borderType) Size &ksize, const Point anchor, const int borderType)
{ {
@ -1243,17 +1401,32 @@ Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, in
} }
Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType,
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype) const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype, Size imgSize )
{ {
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType); int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
int cn = CV_MAT_CN(srcType); int cn = CV_MAT_CN(srcType);
int bdepth = std::max(std::max(sdepth, ddepth), CV_32F); int bdepth = std::max(std::max(sdepth, ddepth), CV_32F);
int bufType = CV_MAKETYPE(bdepth, cn); int bufType = CV_MAKETYPE(bdepth, cn);
Context* clCxt = Context::getContext();
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype); //if image size is non-degenerate and large enough
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta); //and if filter support is reasonable to satisfy larger local memory requirements,
//then we can use single pass routine to avoid extra runtime calls overhead
if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
(rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
imgSize.height > optimizedSepFilterLocalSize + (columnKernel.rows>>1) )
{
return Ptr<FilterEngine_GPU>(new SingleStepSeparableFilterEngine_GPU(rowKernel, columnKernel, bordertype));
}
else
{
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
return createSeparableFilter_GPU(rowFilter, columnFilter); return createSeparableFilter_GPU(rowFilter, columnFilter);
}
} }
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype) void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
@ -1277,16 +1450,16 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype); Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype, src.size());
f->apply(src, dst); f->apply(src, dst);
} }
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType) Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType, Size imgSize )
{ {
Mat kx, ky; Mat kx, ky;
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F); getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
return createSeparableLinearFilter_GPU(srcType, dstType, return createSeparableLinearFilter_GPU(srcType, dstType,
kx, ky, Point(-1, -1), 0, borderType); kx, ky, Point(-1, -1), 0, borderType, imgSize);
} }
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1356,7 +1529,7 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
//////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////
// Gaussian Filter // Gaussian Filter
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype) Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype, Size imgSize)
{ {
int depth = CV_MAT_DEPTH(type); int depth = CV_MAT_DEPTH(type);
@ -1383,7 +1556,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
else else
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F)); ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype); return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype, imgSize);
} }
void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2, int bordertype) void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2, int bordertype)
@ -1419,7 +1592,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype); Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype, src.size());
f->apply(src, dst); f->apply(src, dst);
} }

View File

@ -48,154 +48,142 @@
using namespace cv; using namespace cv;
using namespace cv::ocl; using namespace cv::ocl;
// currently sort procedure on the host is more efficient
static bool use_cpu_sorter = true; static bool use_cpu_sorter = true;
namespace // compact structure for corners
struct DefCorner
{ {
enum SortMethod float eig; //eigenvalue of corner
short x; //x coordinate of corner point
short y; //y coordinate of corner point
} ;
// compare procedure for corner
//it is used for sort on the host side
struct DefCornerCompare
{ {
CPU_STL, bool operator()(const DefCorner a, const DefCorner b) const
BITONIC,
SELECTION
};
const int GROUP_SIZE = 256;
template<SortMethod method>
struct Sorter
{
//typedef EigType;
};
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
template<>
struct Sorter<CPU_STL>
{
typedef oclMat EigType;
static cv::Mutex cs;
static Mat mat_eig;
//prototype
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
{ {
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0])); return a.eig > b.eig;
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
return v1 > v2;
}
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
{
cv::AutoLock lock(cs);
//temporarily use STL's sort function
Mat mat_corners = corners;
mat_eig = eig_tex;
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
corners = mat_corners;
} }
}; };
cv::Mutex Sorter<CPU_STL>::cs;
cv::Mat Sorter<CPU_STL>::mat_eig;
template<> // sort corner point using opencl bitonicosrt implementation
struct Sorter<BITONIC> static void sortCorners_caller(oclMat& corners, const int count)
{ {
typedef TextureCL EigType; Context * cxt = Context::getContext();
int GS = count/2;
int LS = min(255,GS);
size_t globalThreads[3] = {GS, 1, 1};
size_t localThreads[3] = {LS, 1, 1};
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) // 2^numStages should be equal to count or the output is invalid
int numStages = 0;
for(int i = count; i > 1; i >>= 1)
{ {
Context * cxt = Context::getContext(); ++numStages;
size_t globalThreads[3] = {count / 2, 1, 1}; }
size_t localThreads[3] = {GROUP_SIZE, 1, 1}; const int argc = 4;
std::vector< std::pair<size_t, const void *> > args(argc);
// 2^numStages should be equal to count or the output is invalid std::string kernelname = "sortCorners_bitonicSort";
int numStages = 0; args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
for(int i = count; i > 1; i >>= 1) args[1] = std::make_pair(sizeof(cl_int), (void *)&count);
for(int stage = 0; stage < numStages; ++stage)
{
args[2] = std::make_pair(sizeof(cl_int), (void *)&stage);
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
{ {
++numStages; args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
} openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
const int argc = 5;
std::vector< std::pair<size_t, const void *> > args(argc);
String kernelname = "sortCorners_bitonicSort";
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
for(int stage = 0; stage < numStages; ++stage)
{
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
{
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
} }
} }
}; }
template<> // find corners on matrix and put it into array
struct Sorter<SELECTION> static void findCorners_caller(
{ const oclMat& eig_mat, //input matrix worth eigenvalues
typedef TextureCL EigType; oclMat& eigMinMax, //input with min and max values of eigenvalues
const float qualityLevel,
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count) const oclMat& mask,
{ oclMat& corners, //output array with detected corners
Context * cxt = Context::getContext(); oclMat& counter) //output value with number of detected corners, have to be 0 before call
size_t globalThreads[3] = {count, 1, 1};
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
std::vector< std::pair<size_t, const void *> > args;
//local
String kernelname = "sortCorners_selectionSortLocal";
int lds_size = GROUP_SIZE * sizeof(cl_float2);
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
args.push_back( std::make_pair( lds_size, (void*)NULL) );
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
//final
kernelname = "sortCorners_selectionSortFinal";
args.pop_back();
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
};
int findCorners_caller(
const TextureCL& eig,
const float threshold,
const oclMat& mask,
oclMat& corners,
const int max_count)
{ {
String opt;
std::vector<int> k; std::vector<int> k;
Context * cxt = Context::getContext(); Context * cxt = Context::getContext();
std::vector< std::pair<size_t, const void*> > args; std::vector< std::pair<size_t, const void*> > args;
String kernelname = "findCorners";
const int mask_strip = mask.step / mask.elemSize1(); const int mask_strip = mask.step / mask.elemSize1();
oclMat g_counter(1, 1, CV_32SC1); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(eig_mat.data)));
g_counter.setTo(0);
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig )); int src_pitch = (int)eig_mat.step;
args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip)); args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold )); args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eigMinMax.data ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows )); args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data )); args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&counter.data ));
size_t globalThreads[3] = {eig.cols, eig.rows, 1}; size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
size_t localThreads[3] = {16, 16, 1}; size_t localThreads[3] = {16, 16, 1};
if(!mask.empty())
opt += " -D WITH_MASK=1";
const char * opt = mask.empty() ? "" : "-D WITH_MASK"; openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt); }
return std::min(Mat(g_counter).at<int>(0), max_count);
static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
{
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * 2 * src.elemSize();
ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
int all_cols = src.step / src.elemSize();
int pre_cols = (src.offset % src.step) / src.elemSize();
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
int invalid_cols = pre_cols + sec_cols;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
int offset = src.offset / src.elemSize();
{// first parallel pass
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
size_t globalThreads[3] = {groupnum * 256, 1, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
args, -1, -1, "-D T=float -D DEPTH_5");
}
{// run final "serial" kernel to find accumulate results from threads and reset corner counter
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
size_t globalThreads[3] = {1, 1, 1};
size_t localThreads[3] = {1, 1, 1};
openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
args, -1, -1);
}
} }
}//unnamed namespace
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask) void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
{ {
@ -205,67 +193,99 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
ensureSizeIsEnough(image.size(), CV_32F, eig_); ensureSizeIsEnough(image.size(), CV_32F, eig_);
if (useHarrisDetector) if (useHarrisDetector)
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK); cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
else else
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3); cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
double maxVal = 0; ensureSizeIsEnough(1,1, CV_32SC1, counter_);
minMax(eig_, NULL, &maxVal);
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_); // find max eigenvalue and reset detected counters
minMaxEig_caller(eig_,eig_minmax_,counter_);
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_); // allocate buffer for kernels
int total = findCorners_caller( int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
*eig_tex,
static_cast<float>(maxVal * qualityLevel), if(!use_cpu_sorter)
{ // round to 2^n
unsigned int n=1;
for(n=1;n<(unsigned int)corner_array_size;n<<=1);
corner_array_size = (int)n;
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
// set to 0 to be able use bitonic sort on whole 2^n array
tmpCorners_.setTo(0);
}
else
{
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
}
int total = tmpCorners_.cols; // by default the number of corner is full array
std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
//find points with high eigenvalue and put it into the output array
findCorners_caller(
eig_,
eig_minmax_,
static_cast<float>(qualityLevel),
mask, mask,
tmpCorners_, tmpCorners_,
tmpCorners_.cols); counter_);
if(!use_cpu_sorter)
{// sort detected corners on deivce side
sortCorners_caller(tmpCorners_, corner_array_size);
}
else
{// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL));
}
//blocking read whole corners array (sorted or not sorted)
openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner));
if (total == 0) if (total == 0)
{ {// check for trivial case
corners.release(); corners.release();
return; return;
} }
if(use_cpu_sorter) if(use_cpu_sorter)
{ {// sort detected corners on cpu side.
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total); tmp.resize(total);
} std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
else
{
//if total is power of 2
if(((total - 1) & (total)) == 0)
{
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
}
else
{
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
}
} }
//estimate maximal size of final output array
int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
int D2 = (int)ceil(minDistance * minDistance);
// allocate output buffer
std::vector<Point2f> tmp2;
tmp2.reserve(total_max);
if (minDistance < 1) if (minDistance < 1)
{ {// we have not distance restriction. then just copy with conversion maximal allowed points into output array
Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1); for(int i=0;i<total_max && tmp[i].eig>0.0f;++i)
tmpCorners_(roi_range).copyTo(corners); {
tmp2.push_back(Point2f(tmp[i].x,tmp[i].y));
}
} }
else else
{ {// we have distance restriction. then start coping to output array from the first element and check distance for each next one
std::vector<Point2f> tmp(total);
downloadPoints(tmpCorners_, tmp);
std::vector<Point2f> tmp2;
tmp2.reserve(total);
const int cell_size = cvRound(minDistance); const int cell_size = cvRound(minDistance);
const int grid_width = (image.cols + cell_size - 1) / cell_size; const int grid_width = (image.cols + cell_size - 1) / cell_size;
const int grid_height = (image.rows + cell_size - 1) / cell_size; const int grid_height = (image.rows + cell_size - 1) / cell_size;
std::vector< std::vector<Point2f> > grid(grid_width * grid_height); std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
for (int i = 0; i < total; ++i) for (int i = 0; i < total ; ++i)
{ {
Point2f p = tmp[i]; DefCorner p = tmp[i];
if(p.eig<=0.0f)
break; // condition to stop that is needed for GPU bitonic sort usage.
bool good = true; bool good = true;
@ -287,40 +307,42 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
{ {
for (int xx = x1; xx <= x2; xx++) for (int xx = x1; xx <= x2; xx++)
{ {
std::vector<Point2f>& m = grid[yy * grid_width + xx]; std::vector<Point2i>& m = grid[yy * grid_width + xx];
if (m.empty())
if (!m.empty()) continue;
for(size_t j = 0; j < m.size(); j++)
{ {
for(size_t j = 0; j < m.size(); j++) int dx = p.x - m[j].x;
{ int dy = p.y - m[j].y;
float dx = p.x - m[j].x;
float dy = p.y - m[j].y;
if (dx * dx + dy * dy < minDistance * minDistance) if (dx * dx + dy * dy < D2)
{ {
good = false; good = false;
goto break_out; goto break_out_;
}
} }
} }
} }
} }
break_out: break_out_:
if(good) if(good)
{ {
grid[y_cell * grid_width + x_cell].push_back(p); grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x,p.y));
tmp2.push_back(p); tmp2.push_back(Point2f(p.x,p.y));
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners)) if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
break; break;
} }
} }
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
} }
int final_size = static_cast<int>(tmp2.size());
if(final_size>0)
corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
else
corners.release();
} }
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v) void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
{ {

View File

@ -866,16 +866,17 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE)) if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{ {
//setup local group size //setup local group size for "pixel step" = 1
localThreads[0] = 8; localThreads[0] = 16;
localThreads[1] = 16; localThreads[1] = 32;
localThreads[2] = 1; localThreads[2] = 1;
//init maximal number of workgroups //calc maximal number of workgroups
int WGNumX = 1+(sizev[0].width /(localThreads[0])); int WGNumX = 1+(sizev[0].width /(localThreads[0]));
int WGNumY = 1+(sizev[0].height/(localThreads[1])); int WGNumY = 1+(sizev[0].height/(localThreads[1]));
int WGNumZ = loopcount; int WGNumZ = loopcount;
int WGNum = 0; //accurate number of non -empty workgroups int WGNumTotal = 0; //accurate number of non-empty workgroups
int WGNumSampled = 0; //accurate number of workgroups processed only 1/4 part of all pixels. it is made for large images with scale <= 2
oclMat oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U); oclMat oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U);
{ {
cl_int4* pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status); cl_int4* pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status);
@ -895,12 +896,16 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
if(gx>=(Width-cascade->orig_window_size.width)) if(gx>=(Width-cascade->orig_window_size.width))
continue; // no data to process continue; // no data to process
if(scaleinfo[z].factor<=2)
{
WGNumSampled++;
}
// save no-empty workgroup info into array // save no-empty workgroup info into array
pWGInfo[WGNum].s[0] = scaleinfo[z].width_height; pWGInfo[WGNumTotal].s[0] = scaleinfo[z].width_height;
pWGInfo[WGNum].s[1] = (gx << 16) | gy; pWGInfo[WGNumTotal].s[1] = (gx << 16) | gy;
pWGInfo[WGNum].s[2] = scaleinfo[z].imgoff; pWGInfo[WGNumTotal].s[2] = scaleinfo[z].imgoff;
memcpy(&(pWGInfo[WGNum].s[3]),&(scaleinfo[z].factor),sizeof(float)); memcpy(&(pWGInfo[WGNumTotal].s[3]),&(scaleinfo[z].factor),sizeof(float));
WGNum++; WGNumTotal++;
} }
} }
} }
@ -908,13 +913,8 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
pWGInfo = NULL; pWGInfo = NULL;
} }
// setup global sizes to have linear array of workgroups with WGNum size
globalThreads[0] = localThreads[0]*WGNum;
globalThreads[1] = localThreads[1];
globalThreads[2] = 1;
#define NODE_SIZE 12 #define NODE_SIZE 12
// pack node info to have less memory loads // pack node info to have less memory loads on the device side
oclMat oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U); oclMat oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U);
{ {
cl_int status; cl_int status;
@ -963,8 +963,6 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width); options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height); options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based); options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
options += format(" -D LSx=%d",localThreads[0]);
options += format(" -D LSy=%d",localThreads[1]);
options += format(" -D SPLITNODE=%d",splitnode); options += format(" -D SPLITNODE=%d",splitnode);
options += format(" -D SPLITSTAGE=%d",splitstage); options += format(" -D SPLITSTAGE=%d",splitstage);
options += format(" -D OUTPUTSZ=%d",outputsz); options += format(" -D OUTPUTSZ=%d",outputsz);
@ -972,8 +970,39 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
// init candiate global count by 0 // init candiate global count by 0
int pattern = 0; int pattern = 0;
openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL)); openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, localThreads, args, -1, -1, options.c_str()); if(WGNumTotal>WGNumSampled)
{// small images and each pixel is processed
// setup global sizes to have linear array of workgroups with WGNum size
int pixelstep = 1;
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
globalThreads[0] = LS[0]*(WGNumTotal-WGNumSampled);
globalThreads[1] = LS[1];
globalThreads[2] = 1;
String options1 = options;
options1 += format(" -D PIXEL_STEP=%d",pixelstep);
options1 += format(" -D WGSTART=%d",WGNumSampled);
options1 += format(" -D LSx=%d",LS[0]);
options1 += format(" -D LSy=%d",LS[1]);
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options1.c_str());
}
if(WGNumSampled>0)
{// large images each 4th pixel is processed
// setup global sizes to have linear array of workgroups with WGNum size
int pixelstep = 2;
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
globalThreads[0] = LS[0]*WGNumSampled;
globalThreads[1] = LS[1];
globalThreads[2] = 1;
String options2 = options;
options2 += format(" -D PIXEL_STEP=%d",pixelstep);
options2 += format(" -D WGSTART=%d",0);
options2 += format(" -D LSx=%d",LS[0]);
options2 += format(" -D LSy=%d",LS[1]);
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options2.c_str());
}
//read candidate buffer back and put it into host list //read candidate buffer back and put it into host list
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz ); openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
assert(candidate[0]<outputsz); assert(candidate[0]<outputsz);

View File

@ -76,6 +76,11 @@ namespace cv
int cdescr_width; int cdescr_width;
int cdescr_height; int cdescr_height;
// A shift value and type that allows qangle to be different
// sizes on different hardware
int qangle_step_shift;
int qangle_type;
void set_up_constants(int nbins, int block_stride_x, int block_stride_y, void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y); int nblocks_win_x, int nblocks_win_y);
@ -153,6 +158,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
hog_device_cpu = true; hog_device_cpu = true;
else else
hog_device_cpu = false; hog_device_cpu = false;
} }
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@ -213,7 +219,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
effect_size = img.size(); effect_size = img.size();
grad.create(img.size(), CV_32FC2); grad.create(img.size(), CV_32FC2);
qangle.create(img.size(), CV_8UC2); qangle.create(img.size(), hog::qangle_type);
const size_t block_hist_size = getBlockHistogramSize(); const size_t block_hist_size = getBlockHistogramSize();
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
@ -1606,6 +1612,16 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
int descr_size = descr_width * nblocks_win_y; int descr_size = descr_width * nblocks_win_y;
cdescr_size = descr_size; cdescr_size = descr_size;
qangle_type = CV_8UC2;
qangle_step_shift = 0;
// Some Intel devices have low single-byte access performance,
// so we change the datatype here.
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
qangle_type = CV_32SC2;
qangle_step_shift = 2;
}
} }
void cv::ocl::device::hog::compute_hists(int nbins, void cv::ocl::device::hog::compute_hists(int nbins,
@ -1627,7 +1643,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
int blocks_total = img_block_width * img_block_height; int blocks_total = img_block_width * img_block_height;
int grad_quadstep = grad.step >> 2; int grad_quadstep = grad.step >> 2;
int qangle_step = qangle.step; int qangle_step = qangle.step >> qangle_step_shift;
int blocks_in_group = 4; int blocks_in_group = 4;
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
@ -1892,7 +1908,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
char correctGamma = (correct_gamma) ? 1 : 0; char correctGamma = (correct_gamma) ? 1 : 0;
int img_step = img.step; int img_step = img.step;
int grad_quadstep = grad.step >> 3; int grad_quadstep = grad.step >> 3;
int qangle_step = qangle.step >> 1; int qangle_step = qangle.step >> (1 + qangle_step_shift);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
@ -1927,7 +1943,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
char correctGamma = (correct_gamma) ? 1 : 0; char correctGamma = (correct_gamma) ? 1 : 0;
int img_step = img.step >> 2; int img_step = img.step >> 2;
int grad_quadstep = grad.step >> 3; int grad_quadstep = grad.step >> 3;
int qangle_step = qangle.step >> 1; int qangle_step = qangle.step >> (1 + qangle_step_shift);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width)); args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));

View File

@ -1035,67 +1035,117 @@ namespace cv
else else
scale = 1. / scale; scale = 1. / scale;
if (ksize > 0) const int sobel_lsz = 16;
if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
(ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
src.wholerows > sobel_lsz + (ksize>>1) &&
src.wholecols > sobel_lsz + (ksize>>1))
{ {
Context* clCxt = Context::getContext(); Dx.create(src.size(), CV_32FC1);
if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 && Dy.create(src.size(), CV_32FC1);
src.cols % 8 == 0 && src.rows % 8 == 0 &&
ksize==3 && CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
(borderType ==cv::BORDER_REFLECT ||
borderType == cv::BORDER_REPLICATE || size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
borderType ==cv::BORDER_REFLECT101 || size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
borderType ==cv::BORDER_WRAP))
unsigned int src_pitch = src.step;
unsigned int Dx_pitch = Dx.step;
unsigned int Dy_pitch = Dy.step;
int src_offset_x = (src.offset % src.step) / src.elemSize();
int src_offset_y = src.offset / src.step;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.rows ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
switch(src.type())
{ {
Dx.create(src.size(), CV_32FC1); case CV_8UC1:
Dy.create(src.size(), CV_32FC1); option += " -D SRCTYPE=uchar";
break;
const unsigned int block_x = 8; case CV_32FC1:
const unsigned int block_y = 8; option += " -D SRCTYPE=float";
break;
unsigned int src_pitch = src.step;
unsigned int dst_pitch = Dx.cols;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
String option = "-D BLK_X=8 -D BLK_Y=8";
switch(borderType)
{
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
} }
else switch(borderType)
{
case cv::BORDER_CONSTANT:
option += " -D BORDER_CONSTANT";
break;
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT_101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
default:
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
break;
}
String kernel_name;
switch(ksize)
{
case -1:
option += " -D SCHARR";
kernel_name = "sobel3";
break;
case 3:
kernel_name = "sobel3";
break;
case 5:
kernel_name = "sobel5";
break;
case 7:
kernel_name = "sobel7";
break;
default:
CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
}
else
{
if (ksize > 0)
{ {
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType); Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType); Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
} }
} else
else {
{ Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType); Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType); }
} }
CV_Assert(Dx.offset == 0 && Dy.offset == 0); CV_Assert(Dx.offset == 0 && Dy.offset == 0);
} }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,185 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define EXTRAPOLATE(x, maxV)
#elif defined BORDER_REPLICATE
//aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#define SRC(_x,_y) CONVERT_SRCTYPE(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
//horizontal and vertical filter kernels
//should be defined on host during compile time to avoid overhead
__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};
__constant uint mat_kernelY[] = {KERNEL_MATRIX_Y};
__kernel __attribute__((reqd_work_group_size(BLK_X,BLK_Y,1))) void sep_filter_singlepass
(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* Dst,
const int DstOffset,
const uint DstPitch,
int width,
int height,
int dstWidth,
int dstHeight
)
{
//RADIUSX, RADIUSY are filter dimensions
//BLK_X, BLK_Y are local wrogroup sizes
//all these should be defined on host during compile time
//first lsmem array for source pixels used in first pass,
//second lsmemDy for storing first pass results
__local WORKTYPE lsmem[BLK_Y+2*RADIUSY][BLK_X+2*RADIUSX];
__local WORKTYPE lsmemDy[BLK_Y][BLK_X+2*RADIUSX];
//get local and global ids - used as image and local memory array indexes
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
//calculate pixel position in source image taking image offset into account
int srcX = x + srcOffsetX - RADIUSX;
int srcY = y + srcOffsetY - RADIUSY;
int xb = srcX;
int yb = srcY;
//extrapolate coordinates, if needed
//and read my own source pixel into local memory
//with account for extra border pixels, which will be read by starting workitems
int clocY = liy;
int cSrcY = srcY;
do
{
int yb = cSrcY;
EXTRAPOLATE(yb, (height));
int clocX = lix;
int cSrcX = srcX;
do
{
int xb = cSrcX;
EXTRAPOLATE(xb,(width));
lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
clocX += BLK_X;
cSrcX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
clocY += BLK_Y;
cSrcY += BLK_Y;
}
while(clocY < BLK_Y+(RADIUSY*2));
barrier(CLK_LOCAL_MEM_FENCE);
//do vertical filter pass
//and store intermediate results to second local memory array
int i;
WORKTYPE sum = 0.0f;
int clocX = lix;
do
{
sum = 0.0f;
for(i=0; i<=2*RADIUSY; i++)
sum = mad(lsmem[liy+i][clocX], as_float(mat_kernelY[i]), sum);
lsmemDy[liy][clocX] = sum;
clocX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
barrier(CLK_LOCAL_MEM_FENCE);
//if this pixel happened to be out of image borders because of global size rounding,
//then just return
if( x >= dstWidth || y >=dstHeight ) return;
//do second horizontal filter pass
//and calculate final result
sum = 0.0f;
for(i=0; i<=2*RADIUSX; i++)
sum = mad(lsmemDy[liy][lix+i], as_float(mat_kernelX[i]), sum);
//store result into destination image
DST(x,y) = CONVERT_DSTTYPE(sum);
}

View File

@ -126,13 +126,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
) )
{ {
// this version used information provided for each workgroup
// no empty WG
int gid = (int)get_group_id(0); int gid = (int)get_group_id(0);
int lid_x = (int)get_local_id(0); int lid_x = (int)get_local_id(0);
int lid_y = (int)get_local_id(1); int lid_y = (int)get_local_id(1);
int lid = lid_y*LSx+lid_x; int lid = lid_y*LSx+lid_x;
int4 WGInfo = pWGInfo[gid]; int4 WGInfo = pWGInfo[WGSTART+gid];
int GroupX = (WGInfo.y >> 16)&0xFFFF; int GroupX = (WGInfo.y >> 16)&0xFFFF;
int GroupY = (WGInfo.y >> 0 )& 0xFFFF; int GroupY = (WGInfo.y >> 0 )& 0xFFFF;
int Width = (WGInfo.x >> 16)&0xFFFF; int Width = (WGInfo.x >> 16)&0xFFFF;
@ -140,8 +138,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int ImgOffset = WGInfo.z; int ImgOffset = WGInfo.z;
float ScaleFactor = as_float(WGInfo.w); float ScaleFactor = as_float(WGInfo.w);
#define DATA_SIZE_X (LSx+WND_SIZE_X) #define DATA_SIZE_X (PIXEL_STEP*LSx+WND_SIZE_X)
#define DATA_SIZE_Y (LSy+WND_SIZE_Y) #define DATA_SIZE_Y (PIXEL_STEP*LSy+WND_SIZE_Y)
#define DATA_SIZE (DATA_SIZE_X*DATA_SIZE_Y) #define DATA_SIZE (DATA_SIZE_X*DATA_SIZE_Y)
local int SumL[DATA_SIZE]; local int SumL[DATA_SIZE];
@ -165,9 +163,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int4 info1 = p; int4 info1 = p;
int4 info2 = pq; int4 info2 = pq;
{ // calc processed ROI coordinate in local mem
int xl = lid_x; int xl = lid_x*PIXEL_STEP;
int yl = lid_y; int yl = lid_y*PIXEL_STEP;
{// calc variance_norm_factor for all stages
int OffsetLocal = yl * DATA_SIZE_X + xl; int OffsetLocal = yl * DATA_SIZE_X + xl;
int OffsetGlobal = (GroupY+yl)* pixelstep + (GroupX+xl); int OffsetGlobal = (GroupY+yl)* pixelstep + (GroupX+xl);
@ -194,13 +194,13 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int result = (1.0f>0.0f); int result = (1.0f>0.0f);
for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ ) for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
{// iterate until candidate is exist {// iterate until candidate is valid
float stage_sum = 0.0f; float stage_sum = 0.0f;
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*) __global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier)); ((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int lcl_off = (yl*DATA_SIZE_X)+(xl);
int stagecount = stageinfo->count; int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold; float stagethreshold = stageinfo->threshold;
int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x);
for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ ) for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
{ {
// simple macro to extract shorts from int // simple macro to extract shorts from int
@ -212,7 +212,7 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int4 n1 = pN[1]; int4 n1 = pN[1];
int4 n2 = pN[2]; int4 n2 = pN[2];
float nodethreshold = as_float(n2.y) * variance_norm_factor; float nodethreshold = as_float(n2.y) * variance_norm_factor;
// calc sum of intensity pixels according to node information // calc sum of intensity pixels according to classifier node information
float classsum = float classsum =
(SumL[M0(n0.x)+lcl_off] - SumL[M1(n0.x)+lcl_off] - SumL[M0(n0.y)+lcl_off] + SumL[M1(n0.y)+lcl_off]) * as_float(n1.z) + (SumL[M0(n0.x)+lcl_off] - SumL[M1(n0.x)+lcl_off] - SumL[M0(n0.y)+lcl_off] + SumL[M1(n0.y)+lcl_off]) * as_float(n1.z) +
(SumL[M0(n0.z)+lcl_off] - SumL[M1(n0.z)+lcl_off] - SumL[M0(n0.w)+lcl_off] + SumL[M1(n0.w)+lcl_off]) * as_float(n1.w) + (SumL[M0(n0.z)+lcl_off] - SumL[M1(n0.z)+lcl_off] - SumL[M0(n0.w)+lcl_off] + SumL[M1(n0.w)+lcl_off]) * as_float(n1.w) +
@ -228,8 +228,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int index = 1+atomic_inc((volatile global int*)candidate); //get index to write global data with face info int index = 1+atomic_inc((volatile global int*)candidate); //get index to write global data with face info
if(index<OUTPUTSZ) if(index<OUTPUTSZ)
{ {
int x = GroupX+lid_x; int x = GroupX+xl;
int y = GroupY+lid_y; int y = GroupY+yl;
int4 candidate_result; int4 candidate_result;
candidate_result.x = convert_int_rtn(x*ScaleFactor); candidate_result.x = convert_int_rtn(x*ScaleFactor);
candidate_result.y = convert_int_rtn(y*ScaleFactor); candidate_result.y = convert_int_rtn(y*ScaleFactor);

View File

@ -46,33 +46,26 @@
#ifndef WITH_MASK #ifndef WITH_MASK
#define WITH_MASK 0 #define WITH_MASK 0
#endif #endif
//macro to read eigenvalue matrix
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST; #define GET_SRC_32F(_x, _y) ((__global const float*)(eig + (_y)*eig_pitch))[_x]
inline float ELEM_INT2(image2d_t _eig, int _x, int _y)
{
return read_imagef(_eig, sampler, (int2)(_x, _y)).x;
}
inline float ELEM_FLT2(image2d_t _eig, float2 pt)
{
return read_imagef(_eig, sampler, pt).x;
}
__kernel __kernel
void findCorners void findCorners
( (
image2d_t eig, __global const char* eig,
__global const char * mask, const int eig_pitch,
__global float2 * corners, __global const char* mask,
const int mask_strip,// in pixels __global float2* corners,
const float threshold, const int mask_strip,// in pixels
const int rows, __global const float* pMinMax,
const int cols, const float qualityLevel,
const int max_count, const int rows,
__global int * g_counter const int cols,
const int max_count,
__global int* g_counter
) )
{ {
float threshold = qualityLevel*pMinMax[1];
const int j = get_global_id(0); const int j = get_global_id(0);
const int i = get_global_id(1); const int i = get_global_id(1);
@ -82,39 +75,42 @@ __kernel
#endif #endif
) )
{ {
const float val = ELEM_INT2(eig, j, i); const float val = GET_SRC_32F(j, i);
if (val > threshold) if (val > threshold)
{ {
float maxVal = val; float maxVal = val;
maxVal = fmax(GET_SRC_32F(j - 1, i - 1), maxVal);
maxVal = fmax(GET_SRC_32F(j , i - 1), maxVal);
maxVal = fmax(GET_SRC_32F(j + 1, i - 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i - 1), maxVal); maxVal = fmax(GET_SRC_32F(j - 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j , i - 1), maxVal); maxVal = fmax(GET_SRC_32F(j + 1, i), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i - 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i), maxVal); maxVal = fmax(GET_SRC_32F(j - 1, i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i), maxVal); maxVal = fmax(GET_SRC_32F(j , i + 1), maxVal);
maxVal = fmax(GET_SRC_32F(j + 1, i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j - 1, i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j , i + 1), maxVal);
maxVal = fmax(ELEM_INT2(eig, j + 1, i + 1), maxVal);
if (val == maxVal) if (val == maxVal)
{ {
const int ind = atomic_inc(g_counter); const int ind = atomic_inc(g_counter);
if (ind < max_count) if (ind < max_count)
corners[ind] = (float2)(j, i); {// pack and store eigenvalue and its coordinates
corners[ind].x = val;
corners[ind].y = as_float(j|(i<<16));
}
} }
} }
} }
} }
#undef GET_SRC_32F
//bitonic sort //bitonic sort
__kernel __kernel
void sortCorners_bitonicSort void sortCorners_bitonicSort
( (
image2d_t eig,
__global float2 * corners, __global float2 * corners,
const int count, const int count,
const int stage, const int stage,
@ -140,8 +136,8 @@ __kernel
const float2 leftPt = corners[leftId]; const float2 leftPt = corners[leftId];
const float2 rightPt = corners[rightId]; const float2 rightPt = corners[rightId];
const float leftVal = ELEM_FLT2(eig, leftPt); const float leftVal = leftPt.x;
const float rightVal = ELEM_FLT2(eig, rightPt); const float rightVal = rightPt.x;
const bool compareResult = leftVal > rightVal; const bool compareResult = leftVal > rightVal;
@ -152,124 +148,22 @@ __kernel
corners[rightId] = sortOrder ? greater : lesser; corners[rightId] = sortOrder ? greater : lesser;
} }
//selection sort for gfft // this is simple short serial kernel that makes some short reduction and initialization work
//kernel is ported from Bolt library: // it makes HOST like work to avoid additional sync with HOST to do this short work
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl // data - input/output float2.
// Local sort will firstly sort elements of each workgroup using selection sort // input data are sevral (min,max) pairs
// its performance is O(n) // output data is one reduced (min,max) pair
__kernel // g_counter - counter that have to be initialized by 0 for next findCorner call.
void sortCorners_selectionSortLocal __kernel void arithm_op_minMax_final(__global float * data, int groupnum,__global int * g_counter)
(
image2d_t eig,
__global float2 * corners,
const int count,
__local float2 * scratch
)
{ {
int i = get_local_id(0); // index in workgroup g_counter[0] = 0;
int numOfGroups = get_num_groups(0); // index in workgroup float minVal = data[0];
int groupID = get_group_id(0); float maxVal = data[groupnum];
int wg = get_local_size(0); // workgroup size = block size for(int i=1;i<groupnum;++i)
int n; // number of elements to be processed for this work group
int offset = groupID * wg;
int same = 0;
corners += offset;
n = (groupID == (numOfGroups-1))? (count - wg*(numOfGroups-1)) : wg;
float2 pt1, pt2;
pt1 = corners[min(i, n)];
scratch[i] = pt1;
barrier(CLK_LOCAL_MEM_FENCE);
if(i >= n)
{ {
return; minVal = min(minVal,data[i]);
maxVal = max(maxVal,data[i+groupnum]);
} }
data[0] = minVal;
float val1 = ELEM_FLT2(eig, pt1); data[1] = maxVal;
float val2;
int pos = 0;
for (int j=0;j<n;++j)
{
pt2 = scratch[j];
val2 = ELEM_FLT2(eig, pt2);
if(val2 > val1)
pos++;//calculate the rank of this element in this work group
else
{
if(val1 > val2)
continue;
else
{
// val1 and val2 are same
same++;
}
}
}
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
}
__kernel
void sortCorners_selectionSortFinal
(
image2d_t eig,
__global float2 * corners,
const int count
)
{
const int i = get_local_id(0); // index in workgroup
const int numOfGroups = get_num_groups(0); // index in workgroup
const int groupID = get_group_id(0);
const int wg = get_local_size(0); // workgroup size = block size
int pos = 0, same = 0;
const int offset = get_group_id(0) * wg;
const int remainder = count - wg*(numOfGroups-1);
if((offset + i ) >= count)
return;
float2 pt1, pt2;
pt1 = corners[groupID*wg + i];
float val1 = ELEM_FLT2(eig, pt1);
float val2;
for(int j=0; j<numOfGroups-1; j++ )
{
for(int k=0; k<wg; k++)
{
pt2 = corners[j*wg + k];
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Increment only if the value is not the same.
if( val2 > val1 )
pos++;
else
same++;
}
}
}
for(int k=0; k<remainder; k++)
{
pt2 = corners[(numOfGroups-1)*wg + k];
val2 = ELEM_FLT2(eig, pt2);
if(val1 > val2)
break;
else
{
//Don't increment if the value is the same.
//Two elements are same if (*userComp)(jData, iData) and (*userComp)(iData, jData) are both false
if(val2 > val1)
pos++;
else
same++;
}
}
for (int j=0; j< same; j++)
corners[pos + j] = pt1;
} }

View File

@ -1,45 +1,97 @@
/////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type//////////////////////////////////////////// /////////////////////////////////Macro for border type////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_REPLICATE
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh #ifdef BORDER_CONSTANT
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i)) //CCCCCC|abcdefgh|CCCCCCC
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr)) #define EXTRAPOLATE(x, maxV)
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (t_edge) :(i)) #elif defined BORDER_REPLICATE
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (b_edge)-1 :(addr)) //aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x))+1 , max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min( mad24((maxV)-1,2,-(x)), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif #endif
#ifdef BORDER_REFLECT #define SRC(_x,_y) convert_float(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i)) #ifdef BORDER_CONSTANT
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr)) //CCCCCC|abcdefgh|CCCCCCC
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i)-1 : (i)) #define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-1+((b_edge)<<1) : (addr)) #else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif #endif
#ifdef BORDER_REFLECT101 #define DSTX(_x,_y) (((global float*)(DstX+DstXOffset+(_y)*DstXPitch))[_x])
//BORDER_REFLECT101: gfedcb|abcdefgh|gfedcba #define DSTY(_x,_y) (((global float*)(DstY+DstYOffset+(_y)*DstYPitch))[_x])
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? -(i) : (i))
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? -(i)-2+((b_edge)<<1) : (addr))
#endif
#ifdef BORDER_WRAP #define INIT_AND_READ_LOCAL_SOURCE(width, height, fill_const, kernel_border) \
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg int srcX = x + srcOffsetX - (kernel_border); \
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i)) int srcY = y + srcOffsetY - (kernel_border); \
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr)) int xb = srcX; \
#define ADDR_H(i, t_edge, b_edge) ((i) < (t_edge) ? (i)+(b_edge) : (i)) int yb = srcY; \
#define ADDR_B(i, b_edge, addr) ((i) >= (b_edge) ? (i)-(b_edge) : (addr)) \
#endif EXTRAPOLATE(xb, (width)); \
EXTRAPOLATE(yb, (height)); \
lsmem[liy][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
\
if(lix < ((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
EXTRAPOLATE(xb,(width)); \
lsmem[liy][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(liy< ((kernel_border)*2)) \
{ \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(yb, (height)); \
lsmem[liy+BLK_Y][lix] = ELEM(xb, yb, (width), (height), (fill_const) ); \
} \
if(lix<((kernel_border)*2) && liy<((kernel_border)*2)) \
{ \
int xb = srcX+BLK_X; \
int yb = srcY+BLK_Y; \
EXTRAPOLATE(xb,(width)); \
EXTRAPOLATE(yb,(height)); \
lsmem[liy+BLK_Y][lix+BLK_X] = ELEM(xb, yb, (width), (height), (fill_const) ); \
}
__kernel void sobel3( __kernel void sobel3(
__global uchar* Src, __global uchar* Src,
__global float* DstX, const uint SrcPitch,
__global float* DstY, const int srcOffsetX,
int width, int height, const int srcOffsetY,
uint srcStride, uint dstStride, __global uchar* DstX,
float scale const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
) )
{ {
__local float lsmem[BLK_Y+2][BLK_X+2]; __local float lsmem[BLK_Y+2][BLK_X+2];
@ -47,62 +99,249 @@ __kernel void sobel3(
int lix = get_local_id(0); int lix = get_local_id(0);
int liy = get_local_id(1); int liy = get_local_id(1);
int gix = get_group_id(0); int x = (int)get_global_id(0);
int giy = get_group_id(1); int y = (int)get_global_id(1);
int id_x = get_global_id(0);
int id_y = get_global_id(1);
lsmem[liy+1][lix+1] = convert_float(Src[ id_y * srcStride + id_x ]);
int id_y_h = ADDR_H(id_y-1, 0,height);
int id_y_b = ADDR_B(id_y+1, height,id_y+1);
int id_x_l = ADDR_L(id_x-1, 0,width);
int id_x_r = ADDR_R(id_x+1, width,id_x+1);
if(liy==0)
{
lsmem[0][lix+1]=convert_float(Src[ id_y_h * srcStride + id_x ]);
if(lix==0)
lsmem[0][0]=convert_float(Src[ id_y_h * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[0][BLK_X+1]=convert_float(Src[ id_y_h * srcStride + id_x_r ]);
}
else if(liy==BLK_Y-1)
{
lsmem[BLK_Y+1][lix+1]=convert_float(Src[ id_y_b * srcStride + id_x ]);
if(lix==0)
lsmem[BLK_Y+1][0]=convert_float(Src[ id_y_b * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[BLK_Y+1][BLK_X+1]=convert_float(Src[ id_y_b * srcStride + id_x_r ]);
}
if(lix==0)
lsmem[liy+1][0] = convert_float(Src[ id_y * srcStride + id_x_l ]);
else if(lix==BLK_X-1)
lsmem[liy+1][BLK_X+1] = convert_float(Src[ id_y * srcStride + id_x_r ]);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 1)
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float u1 = lsmem[liy][lix]; float u1 = lsmem[liy][lix];
float u2 = lsmem[liy][lix+1]; float u2 = lsmem[liy][lix+1];
float u3 = lsmem[liy][lix+2]; float u3 = lsmem[liy][lix+2];
float m1 = lsmem[liy+1][lix]; float m1 = lsmem[liy+1][lix];
float m2 = lsmem[liy+1][lix+1];
float m3 = lsmem[liy+1][lix+2]; float m3 = lsmem[liy+1][lix+2];
float b1 = lsmem[liy+2][lix]; float b1 = lsmem[liy+2][lix];
float b2 = lsmem[liy+2][lix+1]; float b2 = lsmem[liy+2][lix+1];
float b3 = lsmem[liy+2][lix+2]; float b3 = lsmem[liy+2][lix+2];
//m2 * scale;// //calc and store dx and dy;//
float dx = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1 ); #ifdef SCHARR
DstX[ id_y * dstStride + id_x ] = dx * scale; DSTX(x,y) = mad(10.0f, m3 - m1, 3.0f * (u3 - u1 + b3 - b1)) * scale;
DSTY(x,y) = mad(10.0f, b2 - u2, 3.0f * (b1 - u1 + b3 - u3)) * scale;
float dy = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3); #else
DstY[ id_y * dstStride + id_x ] = dy * scale; DSTX(x,y) = mad(2.0f, m3 - m1, u3 - u1 + b3 - b1) * scale;
DSTY(x,y) = mad(2.0f, b2 - u2, b1 - u1 + b3 - u3) * scale;
#endif
}
__kernel void sobel5(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+4][BLK_X+4];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 2)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float t1 = lsmem[liy][lix];
float t2 = lsmem[liy][lix+1];
float t3 = lsmem[liy][lix+2];
float t4 = lsmem[liy][lix+3];
float t5 = lsmem[liy][lix+4];
float u1 = lsmem[liy+1][lix];
float u2 = lsmem[liy+1][lix+1];
float u3 = lsmem[liy+1][lix+2];
float u4 = lsmem[liy+1][lix+3];
float u5 = lsmem[liy+1][lix+4];
float m1 = lsmem[liy+2][lix];
float m2 = lsmem[liy+2][lix+1];
float m4 = lsmem[liy+2][lix+3];
float m5 = lsmem[liy+2][lix+4];
float l1 = lsmem[liy+3][lix];
float l2 = lsmem[liy+3][lix+1];
float l3 = lsmem[liy+3][lix+2];
float l4 = lsmem[liy+3][lix+3];
float l5 = lsmem[liy+3][lix+4];
float b1 = lsmem[liy+4][lix];
float b2 = lsmem[liy+4][lix+1];
float b3 = lsmem[liy+4][lix+2];
float b4 = lsmem[liy+4][lix+3];
float b5 = lsmem[liy+4][lix+4];
//calc and store dx and dy;//
DSTX(x,y) = scale *
mad(12.0f, m4 - m2,
mad(6.0f, m5 - m1,
mad(8.0f, u4 - u2 + l4 - l2,
mad(4.0f, u5 - u1 + l5 - l1,
mad(2.0f, t4 - t2 + b4 - b2, t5 - t1 + b5 - b1 )
)
)
)
);
DSTY(x,y) = scale *
mad(12.0f, l3 - u3,
mad(6.0f, b3 - t3,
mad(8.0f, l2 - u2 + l4 - u4,
mad(4.0f, b2 - t2 + b4 - t4,
mad(2.0f, l1 - u1 + l5 - u5, b1 - t1 + b5 - t5 )
)
)
)
);
}
__kernel void sobel7(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* DstX,
const int DstXOffset,
const uint DstXPitch,
__global uchar* DstY,
const int DstYOffset,
const uint DstYPitch,
int width,
int height,
int dstWidth,
int dstHeight,
float scale
)
{
__local float lsmem[BLK_Y+6][BLK_X+6];
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
INIT_AND_READ_LOCAL_SOURCE(width, height, 0, 3)
barrier(CLK_LOCAL_MEM_FENCE);
if( x >= dstWidth || y >=dstHeight ) return;
float tt1 = lsmem[liy][lix];
float tt2 = lsmem[liy][lix+1];
float tt3 = lsmem[liy][lix+2];
float tt4 = lsmem[liy][lix+3];
float tt5 = lsmem[liy][lix+4];
float tt6 = lsmem[liy][lix+5];
float tt7 = lsmem[liy][lix+6];
float t1 = lsmem[liy+1][lix];
float t2 = lsmem[liy+1][lix+1];
float t3 = lsmem[liy+1][lix+2];
float t4 = lsmem[liy+1][lix+3];
float t5 = lsmem[liy+1][lix+4];
float t6 = lsmem[liy+1][lix+5];
float t7 = lsmem[liy+1][lix+6];
float u1 = lsmem[liy+2][lix];
float u2 = lsmem[liy+2][lix+1];
float u3 = lsmem[liy+2][lix+2];
float u4 = lsmem[liy+2][lix+3];
float u5 = lsmem[liy+2][lix+4];
float u6 = lsmem[liy+2][lix+5];
float u7 = lsmem[liy+2][lix+6];
float m1 = lsmem[liy+3][lix];
float m2 = lsmem[liy+3][lix+1];
float m3 = lsmem[liy+3][lix+2];
float m5 = lsmem[liy+3][lix+4];
float m6 = lsmem[liy+3][lix+5];
float m7 = lsmem[liy+3][lix+6];
float l1 = lsmem[liy+4][lix];
float l2 = lsmem[liy+4][lix+1];
float l3 = lsmem[liy+4][lix+2];
float l4 = lsmem[liy+4][lix+3];
float l5 = lsmem[liy+4][lix+4];
float l6 = lsmem[liy+4][lix+5];
float l7 = lsmem[liy+4][lix+6];
float b1 = lsmem[liy+5][lix];
float b2 = lsmem[liy+5][lix+1];
float b3 = lsmem[liy+5][lix+2];
float b4 = lsmem[liy+5][lix+3];
float b5 = lsmem[liy+5][lix+4];
float b6 = lsmem[liy+5][lix+5];
float b7 = lsmem[liy+5][lix+6];
float bb1 = lsmem[liy+6][lix];
float bb2 = lsmem[liy+6][lix+1];
float bb3 = lsmem[liy+6][lix+2];
float bb4 = lsmem[liy+6][lix+3];
float bb5 = lsmem[liy+6][lix+4];
float bb6 = lsmem[liy+6][lix+5];
float bb7 = lsmem[liy+6][lix+6];
//calc and store dx and dy
DSTX(x,y) = scale *
mad(100.0f, m5 - m3,
mad(80.0f, m6 - m2,
mad(20.0f, m7 - m1,
mad(75.0f, u5 - u3 + l5 - l3,
mad(60.0f, u6 - u2 + l6 - l2,
mad(15.0f, u7 - u1 + l7 - l1,
mad(30.0f, t5 - t3 + b5 - b3,
mad(24.0f, t6 - t2 + b6 - b2,
mad(6.0f, t7 - t1 + b7 - b1,
mad(5.0f, tt5 - tt3 + bb5 - bb3,
mad(4.0f, tt6 - tt2 + bb6 - bb2, tt7 - tt1 + bb7 - bb1 )
)
)
)
)
)
)
)
)
)
);
DSTY(x,y) = scale *
mad(100.0f, l4 - u4,
mad(80.0f, b4 - t4,
mad(20.0f, bb4 - tt4,
mad(75.0f, l5 - u5 + l3 - u3,
mad(60.0f, b5 - t5 + b3 - t3,
mad(15.0f, bb5 - tt5 + bb3 - tt3,
mad(30.0f, l6 - u6 + l2 - u2,
mad(24.0f, b6 - t6 + b2 - t2,
mad(6.0f, bb6 - tt6 + bb2 - tt2,
mad(5.0f, l7 - u7 + l1 - u1,
mad(4.0f, b7 - t7 + b1 - t1, bb7 - tt7 + bb1 - tt1 )
)
)
)
)
)
)
)
)
)
);
} }

View File

@ -50,6 +50,14 @@
#define NTHREADS 256 #define NTHREADS 256
#define CV_PI_F 3.1415926535897932384626433832795f #define CV_PI_F 3.1415926535897932384626433832795f
#ifdef INTEL_DEVICE
#define QANGLE_TYPE int
#define QANGLE_TYPE2 int2
#else
#define QANGLE_TYPE uchar
#define QANGLE_TYPE2 uchar2
#endif
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
// Histogram computation // Histogram computation
// 12 threads for a cell, 12x4 threads per block // 12 threads for a cell, 12x4 threads per block
@ -59,7 +67,7 @@ __kernel void compute_hists_lut_kernel(
const int cnbins, const int cblock_hist_size, const int img_block_width, const int cnbins, const int cblock_hist_size, const int img_block_width,
const int blocks_in_group, const int blocks_total, const int blocks_in_group, const int blocks_total,
const int grad_quadstep, const int qangle_step, const int grad_quadstep, const int qangle_step,
__global const float* grad, __global const uchar* qangle, __global const float* grad, __global const QANGLE_TYPE* qangle,
__global const float* gauss_w_lut, __global const float* gauss_w_lut,
__global float* block_hists, __local float* smem) __global float* block_hists, __local float* smem)
{ {
@ -86,7 +94,7 @@ __kernel void compute_hists_lut_kernel(
__global const float* grad_ptr = (gid < blocks_total) ? __global const float* grad_ptr = (gid < blocks_total) ?
grad + offset_y * grad_quadstep + (offset_x << 1) : grad; grad + offset_y * grad_quadstep + (offset_x << 1) : grad;
__global const uchar* qangle_ptr = (gid < blocks_total) ? __global const QANGLE_TYPE* qangle_ptr = (gid < blocks_total) ?
qangle + offset_y * qangle_step + (offset_x << 1) : qangle; qangle + offset_y * qangle_step + (offset_x << 1) : qangle;
__local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) +
@ -101,7 +109,7 @@ __kernel void compute_hists_lut_kernel(
for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y) for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y)
{ {
float2 vote = (float2) (grad_ptr[0], grad_ptr[1]); float2 vote = (float2) (grad_ptr[0], grad_ptr[1]);
uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]); QANGLE_TYPE2 bin = (QANGLE_TYPE2) (qangle_ptr[0], qangle_ptr[1]);
grad_ptr += grad_quadstep; grad_ptr += grad_quadstep;
qangle_ptr += qangle_step; qangle_ptr += qangle_step;
@ -558,7 +566,7 @@ __kernel void extract_descrs_by_cols_kernel(
__kernel void compute_gradients_8UC4_kernel( __kernel void compute_gradients_8UC4_kernel(
const int height, const int width, const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step, const int img_step, const int grad_quadstep, const int qangle_step,
const __global uchar4 * img, __global float * grad, __global uchar * qangle, const __global uchar4 * img, __global float * grad, __global QANGLE_TYPE * qangle,
const float angle_scale, const char correct_gamma, const int cnbins) const float angle_scale, const char correct_gamma, const int cnbins)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);
@ -660,7 +668,7 @@ __kernel void compute_gradients_8UC4_kernel(
__kernel void compute_gradients_8UC1_kernel( __kernel void compute_gradients_8UC1_kernel(
const int height, const int width, const int height, const int width,
const int img_step, const int grad_quadstep, const int qangle_step, const int img_step, const int grad_quadstep, const int qangle_step,
__global const uchar * img, __global float * grad, __global uchar * qangle, __global const uchar * img, __global float * grad, __global QANGLE_TYPE * qangle,
const float angle_scale, const char correct_gamma, const int cnbins) const float angle_scale, const char correct_gamma, const int cnbins)
{ {
const int x = get_global_id(0); const int x = get_global_id(0);

View File

@ -116,7 +116,7 @@ Mat randomMat(RNG& rng, Size size, int type, double minVal, double maxVal, bool
Mat m(size0, type); Mat m(size0, type);
rng.fill(m, RNG::UNIFORM, Scalar::all(minVal), Scalar::all(maxVal)); rng.fill(m, RNG::UNIFORM, minVal, maxVal);
if( size0 == size ) if( size0 == size )
return m; return m;
return m(Rect((size0.width-size.width)/2, (size0.height-size.height)/2, size.width, size.height)); return m(Rect((size0.width-size.width)/2, (size0.height-size.height)/2, size.width, size.height));
@ -142,7 +142,7 @@ Mat randomMat(RNG& rng, const vector<int>& size, int type, double minVal, double
Mat m(dims, &size0[0], type); Mat m(dims, &size0[0], type);
rng.fill(m, RNG::UNIFORM, Scalar::all(minVal), Scalar::all(maxVal)); rng.fill(m, RNG::UNIFORM, minVal, maxVal);
if( eqsize ) if( eqsize )
return m; return m;
return m(&r[0]); return m(&r[0]);

View File

@ -28,14 +28,11 @@ set(CMAKE_MODULE_LINKER_FLAGS "-Wl,--fix-cortex-a8 -Wl,--no-undefined -Wl,--gc-s
set(CMAKE_EXE_LINKER_FLAGS "-Wl,--fix-cortex-a8 -Wl,--no-undefined -Wl,--gc-sections -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now ${CMAKE_EXE_LINKER_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "-Wl,--fix-cortex-a8 -Wl,--no-undefined -Wl,--gc-sections -Wl,-z,noexecstack -Wl,-z,relro -Wl,-z,now ${CMAKE_EXE_LINKER_FLAGS}")
if(USE_NEON) if(USE_NEON)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon") message(WARNING "You use obsolete variable USE_NEON to enable NEON instruction set. Use -DENABLE_NEON=ON instead." )
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon") set(ENABLE_NEON TRUE)
elseif(USE_VFPV3) elseif(USE_VFPV3)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfpv3") message(WARNING "You use obsolete variable USE_VFPV3 to enable VFPV3 instruction set. Use -DENABLE_VFPV3=ON instead." )
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfpv3") set(ENABLE_VFPV3 TRUE)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=vfpv3-d16")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=vfpv3-d16")
endif() endif()
set(CMAKE_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH} ${ARM_LINUX_SYSROOT}) set(CMAKE_FIND_ROOT_PATH ${CMAKE_FIND_ROOT_PATH} ${ARM_LINUX_SYSROOT})

View File

@ -0,0 +1,376 @@
// testOpenCVCam.cpp : Defines the entry point for the console application.
//
#include "opencv2/highgui/highgui.hpp"
#include <iostream>
using namespace cv;
using namespace std;
static bool g_printStreamSetting = false;
static int g_imageStreamProfileIdx = -1;
static int g_depthStreamProfileIdx = -1;
static bool g_irStreamShow = false;
static double g_imageBrightness = -DBL_MAX;
static double g_imageContrast = -DBL_MAX;
static bool g_printTiming = false;
static bool g_showClosedPoint = false;
static int g_closedDepthPoint[2];
static void printUsage(const char *arg0)
{
const char *filename = arg0;
while (*filename)
filename++;
while ((arg0 <= filename) && ('\\' != *filename) && ('/' != *filename))
filename--;
filename++;
cout << "This program demonstrates usage of camera supported\nby Intel Perceptual computing SDK." << endl << endl;
cout << "usage: " << filename << "[-ps] [-isp IDX] [-dsp IDX]\n [-ir] [-imb VAL] [-imc VAL]" << endl << endl;
cout << " -ps, print streams setting and profiles" << endl;
cout << " -isp IDX, set profile index of the image stream" << endl;
cout << " -dsp IDX, set profile index of the depth stream" << endl;
cout << " -ir, show data from IR stream" << endl;
cout << " -imb VAL, set brighness value for a image stream" << endl;
cout << " -imc VAL, set contrast value for a image stream" << endl;
cout << " -pts, print frame index and frame time" << endl;
cout << " --show-closed, print frame index and frame time" << endl;
cout << endl;
}
static void parseCMDLine(int argc, char* argv[])
{
if( argc == 1 )
{
printUsage(argv[0]);
}
else
{
for( int i = 1; i < argc; i++ )
{
if ((0 == strcmp(argv[i], "--help")) || (0 == strcmp( argv[i], "-h")))
{
printUsage(argv[0]);
exit(0);
}
else if ((0 == strcmp( argv[i], "--print-streams")) || (0 == strcmp( argv[i], "-ps")))
{
g_printStreamSetting = true;
}
else if ((0 == strcmp( argv[i], "--image-stream-prof")) || (0 == strcmp( argv[i], "-isp")))
{
g_imageStreamProfileIdx = atoi(argv[++i]);
}
else if ((0 == strcmp( argv[i], "--depth-stream-prof")) || (0 == strcmp( argv[i], "-dsp")))
{
g_depthStreamProfileIdx = atoi(argv[++i]);
}
else if (0 == strcmp( argv[i], "-ir"))
{
g_irStreamShow = true;
}
else if (0 == strcmp( argv[i], "-imb"))
{
g_imageBrightness = atof(argv[++i]);
}
else if (0 == strcmp( argv[i], "-imc"))
{
g_imageContrast = atof(argv[++i]);
}
else if (0 == strcmp(argv[i], "-pts"))
{
g_printTiming = true;
}
else if (0 == strcmp(argv[i], "--show-closed"))
{
g_showClosedPoint = true;
}
else
{
cout << "Unsupported command line argument: " << argv[i] << "." << endl;
exit(-1);
}
}
if (g_showClosedPoint && (-1 == g_depthStreamProfileIdx))
{
cerr << "For --show-closed depth profile has be selected" << endl;
exit(-1);
}
}
}
static void printStreamProperties(VideoCapture &capture)
{
size_t profilesCount = (size_t)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
cout << "Image stream." << endl;
cout << " Brightness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS) << endl;
cout << " Contrast = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_CONTRAST) << endl;
cout << " Saturation = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SATURATION) << endl;
cout << " Hue = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_HUE) << endl;
cout << " Gamma = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAMMA) << endl;
cout << " Sharpness = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_SHARPNESS) << endl;
cout << " Gain = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_GAIN) << endl;
cout << " Backligh = " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BACKLIGHT) << endl;
cout << "Image streams profiles:" << endl;
for (size_t i = 0; i < profilesCount; i++)
{
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
cout << " Profile[" << i << "]: ";
cout << "width = " <<
(int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_WIDTH);
cout << ", height = " <<
(int)capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FRAME_HEIGHT);
cout << ", fps = " <<
capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_FPS);
cout << endl;
}
profilesCount = (size_t)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_COUNT);
cout << "Depth stream." << endl;
cout << " Low confidence value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE) << endl;
cout << " Saturation value = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE) << endl;
cout << " Confidence threshold = " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD) << endl;
cout << " Focal length = (" << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ) << ", "
<< capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT) << ")" << endl;
cout << "Depth streams profiles:" << endl;
for (size_t i = 0; i < profilesCount; i++)
{
capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)i);
cout << " Profile[" << i << "]: ";
cout << "width = " <<
(int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_WIDTH);
cout << ", height = " <<
(int)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FRAME_HEIGHT);
cout << ", fps = " <<
capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_FPS);
cout << endl;
}
}
static void imshowImage(const char *winname, Mat &image, VideoCapture &capture)
{
if (g_showClosedPoint)
{
Mat uvMap;
if (capture.retrieve(uvMap, CAP_INTELPERC_UVDEPTH_MAP))
{
float *uvmap = (float *)uvMap.ptr() + 2 * (g_closedDepthPoint[0] * uvMap.cols + g_closedDepthPoint[1]);
int x = (int)((*uvmap) * image.cols); uvmap++;
int y = (int)((*uvmap) * image.rows);
if ((0 <= x) && (0 <= y))
{
static const int pointSize = 4;
for (int row = y; row < min(y + pointSize, image.rows); row++)
{
uchar* ptrDst = image.ptr(row) + x * 3 + 2;//+2 -> Red
for (int col = 0; col < min(pointSize, image.cols - x); col++, ptrDst+=3)
{
*ptrDst = 255;
}
}
}
}
}
imshow(winname, image);
}
static void imshowIR(const char *winname, Mat &ir)
{
Mat image;
if (g_showClosedPoint)
{
image.create(ir.rows, ir.cols, CV_8UC3);
for (int row = 0; row < ir.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)ir.ptr(row);
for (int col = 0; col < ir.cols; col++, ptrSrc++)
{
uchar val = (uchar) ((*ptrSrc) >> 2);
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
}
}
static const int pointSize = 4;
for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
{
uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
{
*ptrDst = 255;
}
}
}
else
{
image.create(ir.rows, ir.cols, CV_8UC1);
for (int row = 0; row < ir.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)ir.ptr(row);
for (int col = 0; col < ir.cols; col++, ptrSrc++, ptrDst++)
{
*ptrDst = (uchar) ((*ptrSrc) >> 2);
}
}
}
imshow(winname, image);
}
static void imshowDepth(const char *winname, Mat &depth, VideoCapture &capture)
{
short lowValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE);
short saturationValue = (short)capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE);
Mat image;
if (g_showClosedPoint)
{
image.create(depth.rows, depth.cols, CV_8UC3);
for (int row = 0; row < depth.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)depth.ptr(row);
for (int col = 0; col < depth.cols; col++, ptrSrc++)
{
if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
{
*ptrDst = 0; ptrDst++;
*ptrDst = 0; ptrDst++;
*ptrDst = 0; ptrDst++;
}
else
{
uchar val = (uchar) ((*ptrSrc) >> 2);
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
*ptrDst = val; ptrDst++;
}
}
}
static const int pointSize = 4;
for (int row = g_closedDepthPoint[0]; row < min(g_closedDepthPoint[0] + pointSize, image.rows); row++)
{
uchar* ptrDst = image.ptr(row) + g_closedDepthPoint[1] * 3 + 2;//+2 -> Red
for (int col = 0; col < min(pointSize, image.cols - g_closedDepthPoint[1]); col++, ptrDst+=3)
{
*ptrDst = 255;
}
}
}
else
{
image.create(depth.rows, depth.cols, CV_8UC1);
for (int row = 0; row < depth.rows; row++)
{
uchar* ptrDst = image.ptr(row);
short* ptrSrc = (short*)depth.ptr(row);
for (int col = 0; col < depth.cols; col++, ptrSrc++, ptrDst++)
{
if ((lowValue == (*ptrSrc)) || (saturationValue == (*ptrSrc)))
*ptrDst = 0;
else
*ptrDst = (uchar) ((*ptrSrc) >> 2);
}
}
}
imshow(winname, image);
}
int main(int argc, char* argv[])
{
parseCMDLine(argc, argv);
VideoCapture capture;
capture.open(CAP_INTELPERC);
if (!capture.isOpened())
{
cerr << "Can not open a capture object." << endl;
return -1;
}
if (g_printStreamSetting)
printStreamProperties(capture);
if (-1 != g_imageStreamProfileIdx)
{
if (!capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_imageStreamProfileIdx))
{
cerr << "Can not setup a image stream." << endl;
return -1;
}
}
if (-1 != g_depthStreamProfileIdx)
{
if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, (double)g_depthStreamProfileIdx))
{
cerr << "Can not setup a depth stream." << endl;
return -1;
}
}
else if (g_irStreamShow)
{
if (!capture.set(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_INTELPERC_PROFILE_IDX, 0.0))
{
cerr << "Can not setup a IR stream." << endl;
return -1;
}
}
else
{
cout << "Streams not selected" << endl;
return 0;
}
//Setup additional properies only after set profile of the stream
if ( (-10000.0 < g_imageBrightness) && (g_imageBrightness < 10000.0))
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageBrightness);
if ( (0 < g_imageContrast) && (g_imageContrast < 10000.0))
capture.set(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_BRIGHTNESS, g_imageContrast);
int frame = 0;
for(;;frame++)
{
Mat bgrImage;
Mat depthImage;
Mat irImage;
if (!capture.grab())
{
cout << "Can not grab images." << endl;
return -1;
}
if ((-1 != g_depthStreamProfileIdx) && (capture.retrieve(depthImage, CAP_INTELPERC_DEPTH_MAP)))
{
if (g_showClosedPoint)
{
double minVal = 0.0; double maxVal = 0.0;
minMaxIdx(depthImage, &minVal, &maxVal, g_closedDepthPoint);
}
imshowDepth("depth image", depthImage, capture);
}
if ((g_irStreamShow) && (capture.retrieve(irImage, CAP_INTELPERC_IR_MAP)))
imshowIR("ir image", irImage);
if ((-1 != g_imageStreamProfileIdx) && (capture.retrieve(bgrImage, CAP_INTELPERC_IMAGE)))
imshowImage("color image", bgrImage, capture);
if (g_printTiming)
{
cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_FRAMES)
<< ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_FRAMES) << endl;
cout << "Image frame: " << capture.get(CAP_INTELPERC_IMAGE_GENERATOR | CAP_PROP_POS_MSEC)
<< ", Depth(IR) frame: " << capture.get(CAP_INTELPERC_DEPTH_GENERATOR | CAP_PROP_POS_MSEC) << endl;
}
if( waitKey(30) >= 0 )
break;
}
return 0;
}

View File

@ -32,13 +32,13 @@ int main()
for (int i = 0; i < image.rows; ++i) for (int i = 0; i < image.rows; ++i)
for (int j = 0; j < image.cols; ++j) for (int j = 0; j < image.cols; ++j)
{ {
Mat sampleMat = (Mat_<float>(1,2) << i,j); Mat sampleMat = (Mat_<float>(1,2) << j,i);
float response = SVM.predict(sampleMat); float response = SVM.predict(sampleMat);
if (response == 1) if (response == 1)
image.at<Vec3b>(j, i) = green; image.at<Vec3b>(i,j) = green;
else if (response == -1) else if (response == -1)
image.at<Vec3b>(j, i) = blue; image.at<Vec3b>(i,j) = blue;
} }
// Show the training data // Show the training data

View File

@ -14,7 +14,10 @@
using namespace std; using namespace std;
using namespace cv; using namespace cv;
#define LOOP_NUM 1 #define LOOP_NUM 1
#define MAX_THREADS 10
///////////////////////////single-threading faces detecting/////////////////////////////// ///////////////////////////single-threading faces detecting///////////////////////////////
@ -29,23 +32,23 @@ const static Scalar colors[] = { CV_RGB(0,0,255),
} ; } ;
int64 work_begin = 0; int64 work_begin[MAX_THREADS] = {0};
int64 work_end = 0; int64 work_total[MAX_THREADS] = {0};
string inputName, outputName, cascadeName; string inputName, outputName, cascadeName;
static void workBegin() static void workBegin(int i = 0)
{ {
work_begin = getTickCount(); work_begin[i] = getTickCount();
} }
static void workEnd() static void workEnd(int i = 0)
{ {
work_end += (getTickCount() - work_begin); work_total[i] += (getTickCount() - work_begin[i]);
} }
static double getTime() static double getTotalTime(int i = 0)
{ {
return work_end /((double)cvGetTickFrequency() * 1000.); return work_total[i] /getTickFrequency() * 1000.;
} }
@ -98,7 +101,6 @@ static int facedetect_one_thread(bool useCPU, double scale )
} }
} }
cvNamedWindow( "result", 1 );
if( capture ) if( capture )
{ {
cout << "In capture ..." << endl; cout << "In capture ..." << endl;
@ -118,7 +120,6 @@ static int facedetect_one_thread(bool useCPU, double scale )
else else
resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR); resize(frameCopy0, frameCopy, Size(), 1./scale, 1./scale, INTER_LINEAR);
work_end = 0;
if(useCPU) if(useCPU)
detectCPU(frameCopy, faces, cpu_cascade, 1); detectCPU(frameCopy, faces, cpu_cascade, 1);
else else
@ -132,16 +133,16 @@ static int facedetect_one_thread(bool useCPU, double scale )
} }
else else
{ {
cout << "In image read" << endl; cout << "In image read " << image.size() << endl;
vector<Rect> faces; vector<Rect> faces;
vector<Rect> ref_rst; vector<Rect> ref_rst;
double accuracy = 0.; double accuracy = 0.;
detectCPU(image, ref_rst, cpu_cascade, scale); detectCPU(image, ref_rst, cpu_cascade, scale);
work_end = 0;
cout << "loops: ";
for(int i = 0; i <= LOOP_NUM; i ++) for(int i = 0; i <= LOOP_NUM; i ++)
{ {
cout << "loop" << i << endl; cout << i << ", ";
if(useCPU) if(useCPU)
detectCPU(image, faces, cpu_cascade, scale); detectCPU(image, faces, cpu_cascade, scale);
else else
@ -152,16 +153,15 @@ static int facedetect_one_thread(bool useCPU, double scale )
accuracy = checkRectSimilarity(image.size(), ref_rst, faces); accuracy = checkRectSimilarity(image.size(), ref_rst, faces);
} }
} }
if (i == LOOP_NUM)
{
if (useCPU)
cout << "average CPU time (noCamera) : ";
else
cout << "average GPU time (noCamera) : ";
cout << getTime() / LOOP_NUM << " ms" << endl;
cout << "accuracy value: " << accuracy <<endl;
}
} }
cout << "done!" << endl;
if (useCPU)
cout << "average CPU time (noCamera) : ";
else
cout << "average GPU time (noCamera) : ";
cout << getTotalTime() / LOOP_NUM << " ms" << endl;
cout << "accuracy value: " << accuracy <<endl;
Draw(image, faces, scale); Draw(image, faces, scale);
waitKey(0); waitKey(0);
} }
@ -174,9 +174,7 @@ static int facedetect_one_thread(bool useCPU, double scale )
///////////////////////////////////////detectfaces with multithreading//////////////////////////////////////////// ///////////////////////////////////////detectfaces with multithreading////////////////////////////////////////////
#if defined(_MSC_VER) && (_MSC_VER >= 1700) #if defined(_MSC_VER) && (_MSC_VER >= 1700)
#define MAX_THREADS 10 static void detectFaces(std::string fileName, int threadNum)
static void detectFaces(std::string fileName)
{ {
ocl::OclCascadeClassifier cascade; ocl::OclCascadeClassifier cascade;
if(!cascade.load(cascadeName)) if(!cascade.load(cascadeName))
@ -188,7 +186,7 @@ static void detectFaces(std::string fileName)
Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR); Mat img = imread(fileName, CV_LOAD_IMAGE_COLOR);
if (img.empty()) if (img.empty())
{ {
std::cout << "cann't open file " + fileName <<std::endl; std::cout << '[' << threadNum << "] " << "can't open file " + fileName <<std::endl;
return; return;
} }
@ -196,23 +194,37 @@ static void detectFaces(std::string fileName)
d_img.upload(img); d_img.upload(img);
std::vector<Rect> oclfaces; std::vector<Rect> oclfaces;
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0 | CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0)); std::thread::id tid = std::this_thread::get_id();
std::cout << '[' << threadNum << "] "
<< "ThreadID = " << tid
<< ", CommandQueue = " << *(void**)ocl::getClCommandQueuePtr()
<< endl;
for(int i = 0; i <= LOOP_NUM; i++)
{
if(i>0) workBegin(threadNum);
cascade.detectMultiScale(d_img, oclfaces, 1.1, 3, 0|CASCADE_SCALE_IMAGE, Size(30, 30), Size(0, 0));
if(i>0) workEnd(threadNum);
}
std::cout << '[' << threadNum << "] " << "Average time = " << getTotalTime(threadNum) / LOOP_NUM << " ms" << endl;
for(unsigned int i = 0; i<oclfaces.size(); i++) for(unsigned int i = 0; i<oclfaces.size(); i++)
rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3); rectangle(img, Point(oclfaces[i].x, oclfaces[i].y), Point(oclfaces[i].x + oclfaces[i].width, oclfaces[i].y + oclfaces[i].height), colors[i%8], 3);
std::string::size_type pos = outputName.rfind('.'); std::string::size_type pos = outputName.rfind('.');
std::string outputNameTid = outputName + '-' + std::to_string(_threadid); std::string strTid = std::to_string(_threadid);
if(pos == std::string::npos) if( !outputName.empty() )
{ {
std::cout << "Invalid output file name: " << outputName << std::endl; if(pos == std::string::npos)
{
std::cout << "Invalid output file name: " << outputName << std::endl;
}
else
{
std::string outputNameTid = outputName.substr(0, pos) + "_" + strTid + outputName.substr(pos);
imwrite(outputNameTid, img);
}
} }
else imshow(strTid, img);
{
outputNameTid = outputName.substr(0, pos) + "_" + std::to_string(_threadid) + outputName.substr(pos);
imwrite(outputNameTid, img);
}
imshow(outputNameTid, img);
waitKey(0); waitKey(0);
} }
@ -221,7 +233,7 @@ static void facedetect_multithreading(int nthreads)
int thread_number = MAX_THREADS < nthreads ? MAX_THREADS : nthreads; int thread_number = MAX_THREADS < nthreads ? MAX_THREADS : nthreads;
std::vector<std::thread> threads; std::vector<std::thread> threads;
for(int i = 0; i<thread_number; i++) for(int i = 0; i<thread_number; i++)
threads.push_back(std::thread(detectFaces, inputName)); threads.push_back(std::thread(detectFaces, inputName, i));
for(int i = 0; i<thread_number; i++) for(int i = 0; i<thread_number; i++)
threads[i].join(); threads[i].join();
} }
@ -237,8 +249,7 @@ int main( int argc, const char** argv )
" specify template file path }" " specify template file path }"
"{ c scale | 1.0 | scale image }" "{ c scale | 1.0 | scale image }"
"{ s use_cpu | false | use cpu or gpu to process the image }" "{ s use_cpu | false | use cpu or gpu to process the image }"
"{ o output | facedetect_output.jpg |" "{ o output | | specify output image save path(only works when input is images) }"
" specify output image save path(only works when input is images) }"
"{ n thread_num | 1 | set number of threads >= 1 }"; "{ n thread_num | 1 | set number of threads >= 1 }";
CommandLineParser cmd(argc, argv, keys); CommandLineParser cmd(argc, argv, keys);
@ -312,8 +323,6 @@ void detectCPU( Mat& img, vector<Rect>& faces,
void Draw(Mat& img, vector<Rect>& faces, double scale) void Draw(Mat& img, vector<Rect>& faces, double scale)
{ {
int i = 0; int i = 0;
putText(img, format("fps: %.1f", 1000./getTime()), Point(450, 50),
FONT_HERSHEY_SIMPLEX, 1, Scalar(0,255,0), 3);
for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) for( vector<Rect>::const_iterator r = faces.begin(); r != faces.end(); r++, i++ )
{ {
Point center; Point center;
@ -324,8 +333,8 @@ void Draw(Mat& img, vector<Rect>& faces, double scale)
radius = cvRound((r->width + r->height)*0.25*scale); radius = cvRound((r->width + r->height)*0.25*scale);
circle( img, center, radius, color, 3, 8, 0 ); circle( img, center, radius, color, 3, 8, 0 );
} }
//imwrite( outputName, img ); //if( !outputName.empty() ) imwrite( outputName, img );
if(abs(scale-1.0)>.001) if( abs(scale-1.0)>.001 )
{ {
resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale))); resize(img, img, Size((int)(img.cols/scale), (int)(img.rows/scale)));
} }