merge with upstream

This commit is contained in:
Matthias Bady 2014-01-05 19:00:25 +01:00
commit e17da9a843
144 changed files with 11391 additions and 1974 deletions

View File

@ -1,502 +0,0 @@
//=============================================================================
//
// multimon.h -- Stub module that fakes multiple monitor apis on Win32 OSes
// without them.
//
// By using this header your code will get back default values from
// GetSystemMetrics() for new metrics, and the new multimonitor APIs
// will act like only one display is present on a Win32 OS without
// multimonitor APIs.
//
// Exactly one source must include this with COMPILE_MULTIMON_STUBS defined.
//
// Copyright (c) Microsoft Corporation. All rights reserved.
//
//=============================================================================
#ifdef __cplusplus
extern "C" { // Assume C declarations for C++
#endif // __cplusplus
//
// If we are building with Win95/NT4 headers, we need to declare
// the multimonitor-related metrics and APIs ourselves.
//
#ifndef SM_CMONITORS
#define SM_XVIRTUALSCREEN 76
#define SM_YVIRTUALSCREEN 77
#define SM_CXVIRTUALSCREEN 78
#define SM_CYVIRTUALSCREEN 79
#define SM_CMONITORS 80
#define SM_SAMEDISPLAYFORMAT 81
// HMONITOR is already declared if WINVER >= 0x0500 in windef.h
// This is for components built with an older version number.
//
#if !defined(HMONITOR_DECLARED) && (WINVER < 0x0500)
DECLARE_HANDLE(HMONITOR);
#define HMONITOR_DECLARED
#endif
#define MONITOR_DEFAULTTONULL 0x00000000
#define MONITOR_DEFAULTTOPRIMARY 0x00000001
#define MONITOR_DEFAULTTONEAREST 0x00000002
#define MONITORINFOF_PRIMARY 0x00000001
typedef struct tagMONITORINFO
{
DWORD cbSize;
RECT rcMonitor;
RECT rcWork;
DWORD dwFlags;
} MONITORINFO, *LPMONITORINFO;
#ifndef CCHDEVICENAME
#define CCHDEVICENAME 32
#endif
#ifdef __cplusplus
typedef struct tagMONITORINFOEXA : public tagMONITORINFO
{
CHAR szDevice[CCHDEVICENAME];
} MONITORINFOEXA, *LPMONITORINFOEXA;
typedef struct tagMONITORINFOEXW : public tagMONITORINFO
{
WCHAR szDevice[CCHDEVICENAME];
} MONITORINFOEXW, *LPMONITORINFOEXW;
#ifdef UNICODE
typedef MONITORINFOEXW MONITORINFOEX;
typedef LPMONITORINFOEXW LPMONITORINFOEX;
#else
typedef MONITORINFOEXA MONITORINFOEX;
typedef LPMONITORINFOEXA LPMONITORINFOEX;
#endif // UNICODE
#else // ndef __cplusplus
typedef struct tagMONITORINFOEXA
{
MONITORINFO;
CHAR szDevice[CCHDEVICENAME];
} MONITORINFOEXA, *LPMONITORINFOEXA;
typedef struct tagMONITORINFOEXW
{
MONITORINFO;
WCHAR szDevice[CCHDEVICENAME];
} MONITORINFOEXW, *LPMONITORINFOEXW;
#ifdef UNICODE
typedef MONITORINFOEXW MONITORINFOEX;
typedef LPMONITORINFOEXW LPMONITORINFOEX;
#else
typedef MONITORINFOEXA MONITORINFOEX;
typedef LPMONITORINFOEXA LPMONITORINFOEX;
#endif // UNICODE
#endif
typedef BOOL (CALLBACK* MONITORENUMPROC)(HMONITOR, HDC, LPRECT, LPARAM);
#ifndef DISPLAY_DEVICE_ATTACHED_TO_DESKTOP
typedef struct _DISPLAY_DEVICEA {
DWORD cb;
CHAR DeviceName[32];
CHAR DeviceString[128];
DWORD StateFlags;
CHAR DeviceID[128];
CHAR DeviceKey[128];
} DISPLAY_DEVICEA, *PDISPLAY_DEVICEA, *LPDISPLAY_DEVICEA;
typedef struct _DISPLAY_DEVICEW {
DWORD cb;
WCHAR DeviceName[32];
WCHAR DeviceString[128];
DWORD StateFlags;
WCHAR DeviceID[128];
WCHAR DeviceKey[128];
} DISPLAY_DEVICEW, *PDISPLAY_DEVICEW, *LPDISPLAY_DEVICEW;
#ifdef UNICODE
typedef DISPLAY_DEVICEW DISPLAY_DEVICE;
typedef PDISPLAY_DEVICEW PDISPLAY_DEVICE;
typedef LPDISPLAY_DEVICEW LPDISPLAY_DEVICE;
#else
typedef DISPLAY_DEVICEA DISPLAY_DEVICE;
typedef PDISPLAY_DEVICEA PDISPLAY_DEVICE;
typedef LPDISPLAY_DEVICEA LPDISPLAY_DEVICE;
#endif // UNICODE
#define DISPLAY_DEVICE_ATTACHED_TO_DESKTOP 0x00000001
#define DISPLAY_DEVICE_MULTI_DRIVER 0x00000002
#define DISPLAY_DEVICE_PRIMARY_DEVICE 0x00000004
#define DISPLAY_DEVICE_MIRRORING_DRIVER 0x00000008
#define DISPLAY_DEVICE_VGA_COMPATIBLE 0x00000010
#endif
#endif // SM_CMONITORS
#undef GetMonitorInfo
#undef GetSystemMetrics
#undef MonitorFromWindow
#undef MonitorFromRect
#undef MonitorFromPoint
#undef EnumDisplayMonitors
#undef EnumDisplayDevices
//
// Define COMPILE_MULTIMON_STUBS to compile the stubs;
// otherwise, you get the declarations.
//
#ifdef COMPILE_MULTIMON_STUBS
//-----------------------------------------------------------------------------
//
// Implement the API stubs.
//
//-----------------------------------------------------------------------------
#ifndef _MULTIMON_USE_SECURE_CRT
#if defined(__GOT_SECURE_LIB__) && __GOT_SECURE_LIB__ >= 200402L
#define _MULTIMON_USE_SECURE_CRT 1
#else
#define _MULTIMON_USE_SECURE_CRT 0
#endif
#endif
#ifndef MULTIMON_FNS_DEFINED
int (WINAPI* g_pfnGetSystemMetrics)(int) = NULL;
HMONITOR (WINAPI* g_pfnMonitorFromWindow)(HWND, DWORD) = NULL;
HMONITOR (WINAPI* g_pfnMonitorFromRect)(LPCRECT, DWORD) = NULL;
HMONITOR (WINAPI* g_pfnMonitorFromPoint)(POINT, DWORD) = NULL;
BOOL (WINAPI* g_pfnGetMonitorInfo)(HMONITOR, LPMONITORINFO) = NULL;
BOOL (WINAPI* g_pfnEnumDisplayMonitors)(HDC, LPCRECT, MONITORENUMPROC, LPARAM) = NULL;
BOOL (WINAPI* g_pfnEnumDisplayDevices)(PVOID, DWORD, PDISPLAY_DEVICE,DWORD) = NULL;
BOOL g_fMultiMonInitDone = FALSE;
BOOL g_fMultimonPlatformNT = FALSE;
#endif
BOOL IsPlatformNT()
{
OSVERSIONINFOA osvi = {0};
osvi.dwOSVersionInfoSize = sizeof(osvi);
GetVersionExA((OSVERSIONINFOA*)&osvi);
return (VER_PLATFORM_WIN32_NT == osvi.dwPlatformId);
}
BOOL InitMultipleMonitorStubs(void)
{
HMODULE hUser32;
if (g_fMultiMonInitDone)
{
return g_pfnGetMonitorInfo != NULL;
}
g_fMultimonPlatformNT = IsPlatformNT();
hUser32 = GetModuleHandle(TEXT("USER32"));
if (hUser32 &&
(*(FARPROC*)&g_pfnGetSystemMetrics = GetProcAddress(hUser32,"GetSystemMetrics")) != NULL &&
(*(FARPROC*)&g_pfnMonitorFromWindow = GetProcAddress(hUser32,"MonitorFromWindow")) != NULL &&
(*(FARPROC*)&g_pfnMonitorFromRect = GetProcAddress(hUser32,"MonitorFromRect")) != NULL &&
(*(FARPROC*)&g_pfnMonitorFromPoint = GetProcAddress(hUser32,"MonitorFromPoint")) != NULL &&
(*(FARPROC*)&g_pfnEnumDisplayMonitors = GetProcAddress(hUser32,"EnumDisplayMonitors")) != NULL &&
#ifdef UNICODE
(*(FARPROC*)&g_pfnEnumDisplayDevices = GetProcAddress(hUser32,"EnumDisplayDevicesW")) != NULL &&
(*(FARPROC*)&g_pfnGetMonitorInfo = g_fMultimonPlatformNT ? GetProcAddress(hUser32,"GetMonitorInfoW") :
GetProcAddress(hUser32,"GetMonitorInfoA")) != NULL
#else
(*(FARPROC*)&g_pfnGetMonitorInfo = GetProcAddress(hUser32,"GetMonitorInfoA")) != NULL &&
(*(FARPROC*)&g_pfnEnumDisplayDevices = GetProcAddress(hUser32,"EnumDisplayDevicesA")) != NULL
#endif
) {
g_fMultiMonInitDone = TRUE;
return TRUE;
}
else
{
g_pfnGetSystemMetrics = NULL;
g_pfnMonitorFromWindow = NULL;
g_pfnMonitorFromRect = NULL;
g_pfnMonitorFromPoint = NULL;
g_pfnGetMonitorInfo = NULL;
g_pfnEnumDisplayMonitors = NULL;
g_pfnEnumDisplayDevices = NULL;
g_fMultiMonInitDone = TRUE;
return FALSE;
}
}
//-----------------------------------------------------------------------------
//
// fake implementations of Monitor APIs that work with the primary display
// no special parameter validation is made since these run in client code
//
//-----------------------------------------------------------------------------
int WINAPI
xGetSystemMetrics(int nIndex)
{
if (InitMultipleMonitorStubs())
return g_pfnGetSystemMetrics(nIndex);
switch (nIndex)
{
case SM_CMONITORS:
case SM_SAMEDISPLAYFORMAT:
return 1;
case SM_XVIRTUALSCREEN:
case SM_YVIRTUALSCREEN:
return 0;
case SM_CXVIRTUALSCREEN:
nIndex = SM_CXSCREEN;
break;
case SM_CYVIRTUALSCREEN:
nIndex = SM_CYSCREEN;
break;
}
return GetSystemMetrics(nIndex);
}
#define xPRIMARY_MONITOR ((HMONITOR)0x12340042)
HMONITOR WINAPI
xMonitorFromPoint(POINT ptScreenCoords, DWORD dwFlags)
{
if (InitMultipleMonitorStubs())
return g_pfnMonitorFromPoint(ptScreenCoords, dwFlags);
if ((dwFlags & (MONITOR_DEFAULTTOPRIMARY | MONITOR_DEFAULTTONEAREST)) ||
((ptScreenCoords.x >= 0) &&
(ptScreenCoords.x < GetSystemMetrics(SM_CXSCREEN)) &&
(ptScreenCoords.y >= 0) &&
(ptScreenCoords.y < GetSystemMetrics(SM_CYSCREEN))))
{
return xPRIMARY_MONITOR;
}
return NULL;
}
HMONITOR WINAPI
xMonitorFromRect(LPCRECT lprcScreenCoords, DWORD dwFlags)
{
if (InitMultipleMonitorStubs())
return g_pfnMonitorFromRect(lprcScreenCoords, dwFlags);
if ((dwFlags & (MONITOR_DEFAULTTOPRIMARY | MONITOR_DEFAULTTONEAREST)) ||
((lprcScreenCoords->right > 0) &&
(lprcScreenCoords->bottom > 0) &&
(lprcScreenCoords->left < GetSystemMetrics(SM_CXSCREEN)) &&
(lprcScreenCoords->top < GetSystemMetrics(SM_CYSCREEN))))
{
return xPRIMARY_MONITOR;
}
return NULL;
}
HMONITOR WINAPI
xMonitorFromWindow(HWND hWnd, DWORD dwFlags)
{
WINDOWPLACEMENT wp;
if (InitMultipleMonitorStubs())
return g_pfnMonitorFromWindow(hWnd, dwFlags);
if (dwFlags & (MONITOR_DEFAULTTOPRIMARY | MONITOR_DEFAULTTONEAREST))
return xPRIMARY_MONITOR;
if (IsIconic(hWnd) ?
GetWindowPlacement(hWnd, &wp) :
GetWindowRect(hWnd, &wp.rcNormalPosition)) {
return xMonitorFromRect(&wp.rcNormalPosition, dwFlags);
}
return NULL;
}
BOOL WINAPI
xGetMonitorInfo(HMONITOR hMonitor, __inout LPMONITORINFO lpMonitorInfo)
{
RECT rcWork;
if (InitMultipleMonitorStubs())
{
BOOL f = g_pfnGetMonitorInfo(hMonitor, lpMonitorInfo);
#ifdef UNICODE
if (f && !g_fMultimonPlatformNT && (lpMonitorInfo->cbSize >= sizeof(MONITORINFOEX)))
{
MultiByteToWideChar(CP_ACP, 0,
(LPSTR)((MONITORINFOEX*)lpMonitorInfo)->szDevice, -1,
((MONITORINFOEX*)lpMonitorInfo)->szDevice, (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)));
}
#endif
return f;
}
if ((hMonitor == xPRIMARY_MONITOR) &&
lpMonitorInfo &&
(lpMonitorInfo->cbSize >= sizeof(MONITORINFO)) &&
SystemParametersInfoA(SPI_GETWORKAREA, 0, &rcWork, 0))
{
lpMonitorInfo->rcMonitor.left = 0;
lpMonitorInfo->rcMonitor.top = 0;
lpMonitorInfo->rcMonitor.right = GetSystemMetrics(SM_CXSCREEN);
lpMonitorInfo->rcMonitor.bottom = GetSystemMetrics(SM_CYSCREEN);
lpMonitorInfo->rcWork = rcWork;
lpMonitorInfo->dwFlags = MONITORINFOF_PRIMARY;
if (lpMonitorInfo->cbSize >= sizeof(MONITORINFOEX))
{
#ifdef UNICODE
MultiByteToWideChar(CP_ACP, 0, "DISPLAY", -1, ((MONITORINFOEX*)lpMonitorInfo)->szDevice, (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)));
#else // UNICODE
#if _MULTIMON_USE_SECURE_CRT
strncpy_s(((MONITORINFOEX*)lpMonitorInfo)->szDevice, (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)), TEXT("DISPLAY"), (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)) - 1);
#else
lstrcpyn(((MONITORINFOEX*)lpMonitorInfo)->szDevice, TEXT("DISPLAY"), (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)));
#endif // _MULTIMON_USE_SECURE_CRT
#endif // UNICODE
}
return TRUE;
}
return FALSE;
}
BOOL WINAPI
xEnumDisplayMonitors(
HDC hdcOptionalForPainting,
LPCRECT lprcEnumMonitorsThatIntersect,
MONITORENUMPROC lpfnEnumProc,
LPARAM dwData)
{
RECT rcLimit;
if (InitMultipleMonitorStubs()) {
return g_pfnEnumDisplayMonitors(
hdcOptionalForPainting,
lprcEnumMonitorsThatIntersect,
lpfnEnumProc,
dwData);
}
if (!lpfnEnumProc)
return FALSE;
rcLimit.left = 0;
rcLimit.top = 0;
rcLimit.right = GetSystemMetrics(SM_CXSCREEN);
rcLimit.bottom = GetSystemMetrics(SM_CYSCREEN);
if (hdcOptionalForPainting)
{
RECT rcClip;
POINT ptOrg;
switch (GetClipBox(hdcOptionalForPainting, &rcClip))
{
default:
if (!GetDCOrgEx(hdcOptionalForPainting, &ptOrg))
return FALSE;
OffsetRect(&rcLimit, -ptOrg.x, -ptOrg.y);
if (IntersectRect(&rcLimit, &rcLimit, &rcClip) &&
(!lprcEnumMonitorsThatIntersect ||
IntersectRect(&rcLimit, &rcLimit, lprcEnumMonitorsThatIntersect))) {
break;
}
//fall thru
case NULLREGION:
return TRUE;
case ERROR:
return FALSE;
}
} else {
if ( lprcEnumMonitorsThatIntersect &&
!IntersectRect(&rcLimit, &rcLimit, lprcEnumMonitorsThatIntersect)) {
return TRUE;
}
}
return lpfnEnumProc(
xPRIMARY_MONITOR,
hdcOptionalForPainting,
&rcLimit,
dwData);
}
BOOL WINAPI
xEnumDisplayDevices(
PVOID Unused,
DWORD iDevNum,
__inout PDISPLAY_DEVICE lpDisplayDevice,
DWORD dwFlags)
{
if (InitMultipleMonitorStubs())
return g_pfnEnumDisplayDevices(Unused, iDevNum, lpDisplayDevice, dwFlags);
if (Unused != NULL)
return FALSE;
if (iDevNum != 0)
return FALSE;
if (lpDisplayDevice == NULL || lpDisplayDevice->cb < sizeof(DISPLAY_DEVICE))
return FALSE;
#ifdef UNICODE
MultiByteToWideChar(CP_ACP, 0, "DISPLAY", -1, lpDisplayDevice->DeviceName, (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)));
MultiByteToWideChar(CP_ACP, 0, "DISPLAY", -1, lpDisplayDevice->DeviceString, (sizeof(lpDisplayDevice->DeviceString)/sizeof(TCHAR)));
#else // UNICODE
#if _MULTIMON_USE_SECURE_CRT
strncpy_s((LPTSTR)lpDisplayDevice->DeviceName, (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)), TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)) - 1);
strncpy_s((LPTSTR)lpDisplayDevice->DeviceString, (sizeof(lpDisplayDevice->DeviceString)/sizeof(TCHAR)), TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)) - 1);
#else
lstrcpyn((LPTSTR)lpDisplayDevice->DeviceName, TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)));
lstrcpyn((LPTSTR)lpDisplayDevice->DeviceString, TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceString)/sizeof(TCHAR)));
#endif // _MULTIMON_USE_SECURE_CRT
#endif // UNICODE
lpDisplayDevice->StateFlags = DISPLAY_DEVICE_ATTACHED_TO_DESKTOP | DISPLAY_DEVICE_PRIMARY_DEVICE;
return TRUE;
}
#undef xPRIMARY_MONITOR
#undef COMPILE_MULTIMON_STUBS
#else // COMPILE_MULTIMON_STUBS
extern int WINAPI xGetSystemMetrics(int);
extern HMONITOR WINAPI xMonitorFromWindow(HWND, DWORD);
extern HMONITOR WINAPI xMonitorFromRect(LPCRECT, DWORD);
extern HMONITOR WINAPI xMonitorFromPoint(POINT, DWORD);
extern BOOL WINAPI xGetMonitorInfo(HMONITOR, LPMONITORINFO);
extern BOOL WINAPI xEnumDisplayMonitors(HDC, LPCRECT, MONITORENUMPROC, LPARAM);
extern BOOL WINAPI xEnumDisplayDevices(PVOID, DWORD, PDISPLAY_DEVICE, DWORD);
#endif // COMPILE_MULTIMON_STUBS
//
// build defines that replace the regular APIs with our versions
//
#define GetSystemMetrics xGetSystemMetrics
#define MonitorFromWindow xMonitorFromWindow
#define MonitorFromRect xMonitorFromRect
#define MonitorFromPoint xMonitorFromPoint
#define GetMonitorInfo xGetMonitorInfo
#define EnumDisplayMonitors xEnumDisplayMonitors
#define EnumDisplayDevices xEnumDisplayDevices
#ifdef __cplusplus
}
#endif // __cplusplus

View File

@ -156,6 +156,7 @@ OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF WIN32 )
# OpenCV build components
@ -207,10 +208,12 @@ OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions"
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND ARM) )
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
OCV_OPTION(ENABLE_WINRT_MODE "Build with Windows Runtime support" OFF IF WIN32 )
OCV_OPTION(ENABLE_WINRT_MODE_NATIVE "Build with Windows Runtime native C++ support" OFF IF WIN32 )
# ----------------------------------------------------------------------------
@ -226,6 +229,15 @@ include(cmake/OpenCVVersion.cmake)
# Save libs and executables in the same place
set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Output directory for applications" )
if (ANDROID)
if (ANDROID_ABI MATCHES "NEON")
set(ENABLE_NEON ON)
endif()
if (ANDROID_ABI MATCHES "VFPV3")
set(ENABLE_VFPV3 ON)
endif()
endif()
if(ANDROID OR WIN32)
set(OPENCV_DOC_INSTALL_PATH doc)
elseif(INSTALL_TO_MANGLED_PATHS)
@ -373,6 +385,8 @@ if(UNIX)
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m log)
elseif(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD|NetBSD|DragonFly")
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} m pthread)
elseif(EMSCRIPTEN)
# no need to link to system libs with emscripten
else()
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m pthread rt)
endif()
@ -630,7 +644,7 @@ endif()
if(WIN32)
status("")
status(" Windows RT support:" HAVE_WINRT THEN YES ELSE NO)
if (ENABLE_WINRT_MODE)
if (ENABLE_WINRT_MODE OR ENABLE_WINRT_MODE_NATIVE)
status(" Windows SDK v8.0:" ${WINDOWS_SDK_PATH})
status(" Visual Studio 2012:" ${VISUAL_STUDIO_PATH})
endif()
@ -820,6 +834,11 @@ if(DEFINED WITH_XINE)
status(" Xine:" HAVE_XINE THEN "YES (ver ${ALIASOF_libxine_VERSION})" ELSE NO)
endif(DEFINED WITH_XINE)
if(DEFINED WITH_INTELPERC)
status(" Intel PerC:" HAVE_INTELPERC THEN "YES" ELSE NO)
endif(DEFINED WITH_INTELPERC)
# ========================== Other third-party libraries ==========================
status("")
status(" Other third-party libraries:")

View File

@ -9,7 +9,7 @@ set(HAVE_WINRT FALSE)
# search Windows Platform SDK
message(STATUS "Checking for Windows Platform SDK")
GET_FILENAME_COMPONENT(WINDOWS_SDK_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\v8.0;InstallationFolder]" ABSOLUTE CACHE)
if (WINDOWS_SDK_PATH STREQUAL "")
if(WINDOWS_SDK_PATH STREQUAL "")
set(HAVE_MSPDK FALSE)
message(STATUS "Windows Platform SDK 8.0 was not found")
else()
@ -19,7 +19,7 @@ endif()
#search for Visual Studio 11.0 install directory
message(STATUS "Checking for Visual Studio 2012")
GET_FILENAME_COMPONENT(VISUAL_STUDIO_PATH [HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\VisualStudio\\11.0\\Setup\\VS;ProductDir] REALPATH CACHE)
if (VISUAL_STUDIO_PATH STREQUAL "")
if(VISUAL_STUDIO_PATH STREQUAL "")
set(HAVE_MSVC2012 FALSE)
message(STATUS "Visual Studio 2012 was not found")
else()
@ -30,11 +30,15 @@ try_compile(HAVE_WINRT_SDK
"${OpenCV_BINARY_DIR}"
"${OpenCV_SOURCE_DIR}/cmake/checks/winrttest.cpp")
if (ENABLE_WINRT_MODE AND HAVE_WINRT_SDK AND HAVE_MSVC2012 AND HAVE_MSPDK)
if(ENABLE_WINRT_MODE AND HAVE_WINRT_SDK AND HAVE_MSVC2012 AND HAVE_MSPDK)
set(HAVE_WINRT TRUE)
set(HAVE_WINRT_CX TRUE)
elseif(ENABLE_WINRT_MODE_NATIVE AND HAVE_WINRT_SDK AND HAVE_MSVC2012 AND HAVE_MSPDK)
set(HAVE_WINRT TRUE)
set(HAVE_WINRT_CX FALSE)
endif()
if (HAVE_WINRT)
if(HAVE_WINRT)
add_definitions(/DWINVER=0x0602 /DNTDDI_VERSION=NTDDI_WIN8 /D_WIN32_WINNT=0x0602)
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /appcontainer")
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /appcontainer")

View File

@ -124,6 +124,12 @@ if(CMAKE_COMPILER_IS_GNUCXX)
if(ENABLE_SSE2)
add_extra_compiler_option(-msse2)
endif()
if (ENABLE_NEON)
add_extra_compiler_option("-mfpu=neon")
endif()
if (ENABLE_VFPV3 AND NOT ENABLE_NEON)
add_extra_compiler_option("-mfpu=vfpv3")
endif()
# SSE3 and further should be disabled under MingW because it generates compiler errors
if(NOT MINGW)

View File

@ -0,0 +1,20 @@
# Main variables:
# INTELPERC_LIBRARIES and INTELPERC_INCLUDE to link Intel Perceptial Computing SDK modules
# HAVE_INTELPERC for conditional compilation OpenCV with/without Intel Perceptial Computing SDK
if(X86_64)
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/x64" DOC "Path to Intel Perceptual Computing SDK interface libraries")
else()
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/Win32" DOC "Path to Intel Perceptual Computing SDK interface libraries")
endif()
if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
set(HAVE_INTELPERC TRUE)
else()
set(HAVE_INTELPERC FALSE)
message(WARNING "Intel Perceptual Computing SDK library directory (set by INTELPERC_LIB_DIR variable) is not found or does not have Intel Perceptual Computing SDK libraries.")
endif() #if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
mark_as_advanced(FORCE INTELPERC_LIBRARIES INTELPERC_INCLUDE_DIR)

View File

@ -277,3 +277,8 @@ if (NOT IOS)
set(HAVE_QTKIT YES)
endif()
endif()
# --- Intel Perceptual Computing SDK ---
if(WITH_INTELPERC)
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIntelPerCSDK.cmake")
endif(WITH_INTELPERC)

View File

@ -88,6 +88,9 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#cmakedefine HAVE_INTTYPES_H 1
/* Intel Perceptual Computing SDK library */
#cmakedefine HAVE_INTELPERC
/* Intel Integrated Performance Primitives */
#cmakedefine HAVE_IPP

View File

@ -48,10 +48,10 @@ The structure of package contents looks as follows:
::
OpenCV-2.4.7-android-sdk
OpenCV-2.4.8-android-sdk
|_ apk
| |_ OpenCV_2.4.7_binary_pack_armv7a.apk
| |_ OpenCV_2.4.7_Manager_2.14_XXX.apk
| |_ OpenCV_2.4.8_binary_pack_armv7a.apk
| |_ OpenCV_2.4.8_Manager_2.16_XXX.apk
|
|_ doc
|_ samples
@ -157,10 +157,10 @@ Get the OpenCV4Android SDK
.. code-block:: bash
unzip ~/Downloads/OpenCV-2.4.7-android-sdk.zip
unzip ~/Downloads/OpenCV-2.4.8-android-sdk.zip
.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.7-android-sdk.zip`
.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.7/OpenCV-2.4.7-android-sdk.zip/download
.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.8-android-sdk.zip`
.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.8/OpenCV-2.4.8-android-sdk.zip/download
.. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack|
.. |seven_zip| replace:: 7-Zip
.. _seven_zip: http://www.7-zip.org/
@ -295,7 +295,7 @@ Well, running samples from Eclipse is very simple:
.. code-block:: sh
:linenos:
<Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.7_Manager_2.14_armv7a-neon.apk
<Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.8_Manager_2.16_armv7a-neon.apk
.. note:: ``armeabi``, ``armv7a-neon``, ``arm7a-neon-android8``, ``mips`` and ``x86`` stand for
platform targets:

View File

@ -55,14 +55,14 @@ Manager to access OpenCV libraries externally installed in the target system.
:guilabel:`File -> Import -> Existing project in your workspace`.
Press :guilabel:`Browse` button and locate OpenCV4Android SDK
(:file:`OpenCV-2.4.7-android-sdk/sdk`).
(:file:`OpenCV-2.4.8-android-sdk/sdk`).
.. image:: images/eclipse_opencv_dependency0.png
:alt: Add dependency from OpenCV library
:align: center
#. In application project add a reference to the OpenCV Java SDK in
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.7``.
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.8``.
.. image:: images/eclipse_opencv_dependency1.png
:alt: Add dependency from OpenCV library
@ -128,27 +128,27 @@ described above.
#. Add the OpenCV library project to your workspace the same way as for the async initialization
above. Use menu :guilabel:`File -> Import -> Existing project in your workspace`,
press :guilabel:`Browse` button and select OpenCV SDK path
(:file:`OpenCV-2.4.7-android-sdk/sdk`).
(:file:`OpenCV-2.4.8-android-sdk/sdk`).
.. image:: images/eclipse_opencv_dependency0.png
:alt: Add dependency from OpenCV library
:align: center
#. In the application project add a reference to the OpenCV4Android SDK in
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.7``;
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.8``;
.. image:: images/eclipse_opencv_dependency1.png
:alt: Add dependency from OpenCV library
:align: center
#. If your application project **doesn't have a JNI part**, just copy the corresponding OpenCV
native libs from :file:`<OpenCV-2.4.7-android-sdk>/sdk/native/libs/<target_arch>` to your
native libs from :file:`<OpenCV-2.4.8-android-sdk>/sdk/native/libs/<target_arch>` to your
project directory to folder :file:`libs/<target_arch>`.
In case of the application project **with a JNI part**, instead of manual libraries copying you
need to modify your ``Android.mk`` file:
add the following two code lines after the ``"include $(CLEAR_VARS)"`` and before
``"include path_to_OpenCV-2.4.7-android-sdk/sdk/native/jni/OpenCV.mk"``
``"include path_to_OpenCV-2.4.8-android-sdk/sdk/native/jni/OpenCV.mk"``
.. code-block:: make
:linenos:
@ -221,7 +221,7 @@ taken:
.. code-block:: make
include C:\Work\OpenCV4Android\OpenCV-2.4.7-android-sdk\sdk\native\jni\OpenCV.mk
include C:\Work\OpenCV4Android\OpenCV-2.4.8-android-sdk\sdk\native\jni\OpenCV.mk
Should be inserted into the :file:`jni/Android.mk` file **after** this line:

View File

@ -106,8 +106,8 @@ Enable hardware optimizations
-----------------------------
Depending on target platform architecture different instruction sets can be used. By default
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DENABLE_VFPV3=ON``
to cmake command line to enable code generation for VFPv3 and ``-DENABLE_NEON=ON`` for using
NEON SIMD extensions.
TBB is supported on multi core ARM SoCs also.

View File

@ -0,0 +1,79 @@
*******
HighGUI
*******
.. highlight:: cpp
Using Creative Senz3D and other Intel Perceptual Computing SDK compatible depth sensors
=======================================================================================
Depth sensors compatible with Intel Perceptual Computing SDK are supported through ``VideoCapture`` class. Depth map, RGB image and some other formats of output can be retrieved by using familiar interface of ``VideoCapture``.
In order to use depth sensor with OpenCV you should do the following preliminary steps:
#.
Install Intel Perceptual Computing SDK (from here http://www.intel.com/software/perceptual).
#.
Configure OpenCV with Intel Perceptual Computing SDK support by setting ``WITH_INTELPERC`` flag in CMake. If Intel Perceptual Computing SDK is found in install folders OpenCV will be built with Intel Perceptual Computing SDK library (see a status ``INTELPERC`` in CMake log). If CMake process doesn't find Intel Perceptual Computing SDK installation folder automatically, the user should change corresponding CMake variables ``INTELPERC_LIB_DIR`` and ``INTELPERC_INCLUDE_DIR`` to the proper value.
#.
Build OpenCV.
VideoCapture can retrieve the following data:
#.
data given from depth generator:
* ``CV_CAP_INTELPERC_DEPTH_MAP`` - each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth. (CV_16UC1)
* ``CV_CAP_INTELPERC_UVDEPTH_MAP`` - each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates. (CV_32FC2)
* ``CV_CAP_INTELPERC_IR_MAP`` - each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam. (CV_16UC1)
#.
data given from RGB image generator:
* ``CV_CAP_INTELPERC_IMAGE`` - color image. (CV_8UC3)
In order to get depth map from depth sensor use ``VideoCapture::operator >>``, e. g. ::
VideoCapture capture( CV_CAP_INTELPERC );
for(;;)
{
Mat depthMap;
capture >> depthMap;
if( waitKey( 30 ) >= 0 )
break;
}
For getting several data maps use ``VideoCapture::grab`` and ``VideoCapture::retrieve``, e.g. ::
VideoCapture capture(CV_CAP_INTELPERC);
for(;;)
{
Mat depthMap;
Mat image;
Mat irImage;
capture.grab();
capture.retrieve( depthMap, CV_CAP_INTELPERC_DEPTH_MAP );
capture.retrieve( image, CV_CAP_INTELPERC_IMAGE );
capture.retrieve( irImage, CV_CAP_INTELPERC_IR_MAP);
if( waitKey( 30 ) >= 0 )
break;
}
For setting and getting some property of sensor` data generators use ``VideoCapture::set`` and ``VideoCapture::get`` methods respectively, e.g. ::
VideoCapture capture( CV_CAP_INTELPERC );
capture.set( CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0 );
cout << "FPS " << capture.get( CV_CAP_INTELPERC_DEPTH_GENERATOR+CV_CAP_PROP_FPS ) << endl;
Since two types of sensor's data generators are supported (image generator and depth generator), there are two flags that should be used to set/get property of the needed generator:
* CV_CAP_INTELPERC_IMAGE_GENERATOR -- a flag for access to the image generator properties.
* CV_CAP_INTELPERC_DEPTH_GENERATOR -- a flag for access to the depth generator properties. This flag value is assumed by default if neither of the two possible values of the property is set.
For more information please refer to the example of usage intelperc_capture.cpp_ in ``opencv/samples/cpp`` folder.
.. _intelperc_capture.cpp: https://github.com/Itseez/opencv/tree/master/samples/cpp/intelperc_capture.cpp

View File

@ -9,3 +9,4 @@ OpenCV User Guide
ug_features2d.rst
ug_highgui.rst
ug_traincascade.rst
ug_intelperc.rst

View File

@ -2,8 +2,11 @@ set(the_description "The Core Functionality")
ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" OPTIONAL opencv_cudev)
ocv_module_include_directories(${ZLIB_INCLUDE_DIRS})
if(HAVE_WINRT_CX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW")
endif()
if(HAVE_WINRT)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
endif()
if(HAVE_CUDA)

View File

@ -903,7 +903,7 @@ So, the function chooses an operation mode depending on the flags and size of th
* When ``DFT_COMPLEX_OUTPUT`` is set, the output is a complex matrix of the same size as input.
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DCT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DFT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
* If the input array is complex and either ``DFT_INVERSE`` or ``DFT_REAL_OUTPUT`` are not set, the output is a complex array of the same size as input. The function performs a forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags ``DFT_INVERSE`` and ``DFT_ROWS``.

View File

@ -444,7 +444,7 @@ CV_INLINE int cvIsInf( double value )
// atomic increment on the linux version of the Intel(tm) compiler
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
#elif defined __GNUC__
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
# ifdef __ATOMIC_ACQ_REL
# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
# else

View File

@ -267,6 +267,12 @@ inline _InputOutputArray::_InputOutputArray(const Mat& m)
inline _InputOutputArray::_InputOutputArray(const std::vector<Mat>& vec)
{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); }
inline _InputOutputArray::_InputOutputArray(const UMat& m)
{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_RW, &m); }
inline _InputOutputArray::_InputOutputArray(const std::vector<UMat>& vec)
{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_RW, &vec); }
inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat)
{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_RW, &d_mat); }

View File

@ -210,6 +210,7 @@ public:
Context2(const Context2& c);
Context2& operator = (const Context2& c);
bool create();
bool create(int dtype);
size_t ndevices() const;
const Device& device(size_t idx) const;
@ -488,6 +489,7 @@ public:
bool runTask(bool sync, const Queue& q=Queue());
size_t workGroupSize() const;
size_t preferedWorkGroupSizeMultiple() const;
bool compileWorkGroupSize(size_t wsz[]) const;
size_t localMemSize() const;

View File

@ -394,7 +394,9 @@ template<typename _Tp> static inline _Tp randu()
return (_Tp)theRNG();
}
///////////////////////////////// Formatted string generation /////////////////////////////////
CV_EXPORTS String format( const char* fmt, ... );
///////////////////////////////// Formatted output of cv::Mat /////////////////////////////////

View File

@ -85,7 +85,7 @@ template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
public:
typedef _Tp value_type;
//! the default contructor
//! the default constructor
AutoBuffer();
//! constructor taking the real buffer size
AutoBuffer(size_t _size);

View File

@ -47,13 +47,81 @@
namespace cvtest {
namespace ocl {
///////////// Lut ////////////////////////
typedef Size_MatType LUTFixture;
OCL_PERF_TEST_P(LUTFixture, LUT,
::testing::Combine(OCL_TEST_SIZES,
OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), cn = CV_MAT_CN(type);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, CV_8UC(cn)), lut(1, 256, type);
int dstType = CV_MAKETYPE(lut.depth(), src.channels());
UMat dst(srcSize, dstType);
declare.in(src, lut, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::LUT(src, lut, dst);
SANITY_CHECK(dst);
}
///////////// Exp ////////////////////////
typedef Size_MatType ExpFixture;
OCL_PERF_TEST_P(ExpFixture, Exp, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
declare.in(src).out(dst);
randu(src, 5, 16);
OCL_TEST_CYCLE() cv::exp(src, dst);
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
}
///////////// Log ////////////////////////
typedef Size_MatType LogFixture;
OCL_PERF_TEST_P(LogFixture, Log, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
randu(src, 1, 10000);
declare.in(src).out(dst);
OCL_TEST_CYCLE() cv::log(src, dst);
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
}
///////////// Add ////////////////////////
typedef Size_MatType AddFixture;
OCL_PERF_TEST_P(AddFixture, Add,
::testing::Combine(OCL_TEST_SIZES,
OCL_TEST_TYPES))
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size srcSize = GET_PARAM(0);
const int type = GET_PARAM(1);
@ -61,15 +129,691 @@ OCL_PERF_TEST_P(AddFixture, Add,
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
randu(src1);
randu(src2);
declare.in(src1, src2).out(dst);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::add(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// Subtract ////////////////////////
typedef Size_MatType SubtractFixture;
OCL_PERF_TEST_P(SubtractFixture, Subtract,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::subtract(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// Mul ////////////////////////
typedef Size_MatType MulFixture;
OCL_PERF_TEST_P(MulFixture, Multiply, ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::multiply(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// Div ////////////////////////
typedef Size_MatType DivFixture;
OCL_PERF_TEST_P(DivFixture, Divide,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::divide(src1, src2, dst);
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
}
///////////// Absdiff ////////////////////////
typedef Size_MatType AbsDiffFixture;
OCL_PERF_TEST_P(AbsDiffFixture, Absdiff,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).in(dst);
OCL_TEST_CYCLE() cv::absdiff(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// CartToPolar ////////////////////////
typedef Size_MatType CartToPolarFixture;
OCL_PERF_TEST_P(CartToPolarFixture, CartToPolar, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type),
dst1(srcSize, type), dst2(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst1, dst2);
OCL_TEST_CYCLE() cv::cartToPolar(src1, src2, dst1, dst2);
SANITY_CHECK(dst1, 8e-3);
SANITY_CHECK(dst2, 8e-3);
}
///////////// PolarToCart ////////////////////////
typedef Size_MatType PolarToCartFixture;
OCL_PERF_TEST_P(PolarToCartFixture, PolarToCart, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type),
dst1(srcSize, type), dst2(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst1, dst2);
OCL_TEST_CYCLE() cv::polarToCart(src1, src2, dst1, dst2);
SANITY_CHECK(dst1, 5e-5);
SANITY_CHECK(dst2, 5e-5);
}
///////////// Magnitude ////////////////////////
typedef Size_MatType MagnitudeFixture;
OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type),
dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::magnitude(src1, src2, dst);
SANITY_CHECK(dst, 1e-6);
}
///////////// Transpose ////////////////////////
typedef Size_MatType TransposeFixture;
OCL_PERF_TEST_P(TransposeFixture, Transpose, ::testing::Combine(
OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
declare.in(src, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::transpose(src, dst);
SANITY_CHECK(dst);
}
///////////// Flip ////////////////////////
enum
{
FLIP_BOTH = 0, FLIP_ROWS, FLIP_COLS
};
CV_ENUM(FlipType, FLIP_BOTH, FLIP_ROWS, FLIP_COLS)
typedef std::tr1::tuple<Size, MatType, FlipType> FlipParams;
typedef TestBaseWithParam<FlipParams> FlipFixture;
OCL_PERF_TEST_P(FlipFixture, Flip,
::testing::Combine(OCL_TEST_SIZES,
OCL_TEST_TYPES, FlipType::all()))
{
const FlipParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
const int flipType = get<2>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
declare.in(src, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::flip(src, dst, flipType - 1);
SANITY_CHECK(dst);
}
///////////// minMaxLoc ////////////////////////
typedef Size_MatType MinMaxLocFixture;
OCL_PERF_TEST_P(MinMaxLocFixture, MinMaxLoc,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
bool onecn = CV_MAT_CN(type) == 1;
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);;
declare.in(src, WARMUP_RNG);
double min_val = 0.0, max_val = 0.0;
Point min_loc, max_loc;
OCL_TEST_CYCLE() cv::minMaxLoc(src, &min_val, &max_val, onecn ? &min_loc : NULL,
onecn ? &max_loc : NULL);
ASSERT_GE(max_val, min_val);
SANITY_CHECK(min_val);
SANITY_CHECK(max_val);
int min_loc_x = min_loc.x, min_loc_y = min_loc.y, max_loc_x = max_loc.x,
max_loc_y = max_loc.y;
SANITY_CHECK(min_loc_x);
SANITY_CHECK(min_loc_y);
SANITY_CHECK(max_loc_x);
SANITY_CHECK(max_loc_y);
}
///////////// Sum ////////////////////////
typedef Size_MatType SumFixture;
OCL_PERF_TEST_P(SumFixture, Sum,
::testing::Combine(OCL_TEST_SIZES,
OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), depth = CV_MAT_DEPTH(type);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);
Scalar result;
randu(src, 0, 60);
declare.in(src);
OCL_TEST_CYCLE() result = cv::sum(src);
if (depth >= CV_32F)
SANITY_CHECK(result, 1e-6, ERROR_RELATIVE);
else
SANITY_CHECK(result);
}
///////////// countNonZero ////////////////////////
typedef Size_MatType CountNonZeroFixture;
OCL_PERF_TEST_P(CountNonZeroFixture, CountNonZero,
::testing::Combine(OCL_TEST_SIZES,
OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);
int result = 0;
randu(src, 0, 10);
declare.in(src);
OCL_TEST_CYCLE() result = cv::countNonZero(src);
SANITY_CHECK(result);
}
///////////// Phase ////////////////////////
typedef Size_MatType PhaseFixture;
OCL_PERF_TEST_P(PhaseFixture, Phase, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type),
dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::phase(src1, src2, dst, 1);
SANITY_CHECK(dst, 1e-2);
}
///////////// bitwise_and////////////////////////
typedef Size_MatType BitwiseAndFixture;
OCL_PERF_TEST_P(BitwiseAndFixture, Bitwise_and,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::bitwise_and(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// bitwise_xor ////////////////////////
typedef Size_MatType BitwiseXorFixture;
OCL_PERF_TEST_P(BitwiseXorFixture, Bitwise_xor,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::bitwise_xor(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// bitwise_or ////////////////////////
typedef Size_MatType BitwiseOrFixture;
OCL_PERF_TEST_P(BitwiseOrFixture, Bitwise_or,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::bitwise_or(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// bitwise_not ////////////////////////
typedef Size_MatType BitwiseNotFixture;
OCL_PERF_TEST_P(BitwiseNotFixture, Bitwise_not,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
declare.in(src, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::bitwise_not(src, dst);
SANITY_CHECK(dst);
}
///////////// compare////////////////////////
CV_ENUM(CmpCode, CMP_LT, CMP_LE, CMP_EQ, CMP_NE, CMP_GE, CMP_GT)
typedef std::tr1::tuple<Size, MatType, CmpCode> CompareParams;
typedef TestBaseWithParam<CompareParams> CompareFixture;
OCL_PERF_TEST_P(CompareFixture, Compare,
::testing::Combine(OCL_TEST_SIZES,
OCL_TEST_TYPES, CmpCode::all()))
{
const CompareParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
const int cmpCode = get<2>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, CV_8UC1);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::compare(src1, src2, dst, cmpCode);
SANITY_CHECK(dst);
}
///////////// pow ////////////////////////
typedef Size_MatType PowFixture;
OCL_PERF_TEST_P(PowFixture, Pow, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
randu(src, -100, 100);
declare.in(src).out(dst);
OCL_TEST_CYCLE() cv::pow(src, -2.0, dst);
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
}
///////////// AddWeighted////////////////////////
typedef Size_MatType AddWeightedFixture;
OCL_PERF_TEST_P(AddWeightedFixture, AddWeighted,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), depth = CV_MAT_DEPTH(type);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
double alpha = 2.0, beta = 1.0, gama = 3.0;
OCL_TEST_CYCLE() cv::addWeighted(src1, alpha, src2, beta, gama, dst);
if (depth >= CV_32F)
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
else
SANITY_CHECK(dst);
}
///////////// Sqrt ///////////////////////
typedef Size_MatType SqrtFixture;
OCL_PERF_TEST_P(SqrtFixture, Sqrt, ::testing::Combine(
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
randu(src, 0, 1000);
declare.in(src).out(dst);
OCL_TEST_CYCLE() cv::sqrt(src, dst);
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
}
///////////// SetIdentity ////////////////////////
typedef Size_MatType SetIdentityFixture;
OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat dst(srcSize, type);
declare.out(dst);
OCL_TEST_CYCLE() cv::setIdentity(dst, cv::Scalar::all(181));
SANITY_CHECK(dst);
}
///////////// MeanStdDev ////////////////////////
typedef Size_MatType MeanStdDevFixture;
OCL_PERF_TEST_P(MeanStdDevFixture, DISABLED_MeanStdDev,
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
const double eps = 1e-5;
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);
Scalar mean, stddev;
declare.in(src, WARMUP_RNG);
OCL_TEST_CYCLE() cv::meanStdDev(src, mean, stddev);
double mean0 = mean[0], mean1 = mean[1], mean2 = mean[2], mean3 = mean[3];
double stddev0 = stddev[0], stddev1 = stddev[1], stddev2 = stddev[2], stddev3 = stddev[3];
SANITY_CHECK(mean0, eps, ERROR_RELATIVE);
SANITY_CHECK(mean1, eps, ERROR_RELATIVE);
SANITY_CHECK(mean2, eps, ERROR_RELATIVE);
SANITY_CHECK(mean3, eps, ERROR_RELATIVE);
SANITY_CHECK(stddev0, eps, ERROR_RELATIVE);
SANITY_CHECK(stddev1, eps, ERROR_RELATIVE);
SANITY_CHECK(stddev2, eps, ERROR_RELATIVE);
SANITY_CHECK(stddev3, eps, ERROR_RELATIVE);
}
///////////// Norm ////////////////////////
CV_ENUM(NormType, NORM_INF, NORM_L1, NORM_L2)
typedef std::tr1::tuple<Size, MatType, NormType> NormParams;
typedef TestBaseWithParam<NormParams> NormFixture;
OCL_PERF_TEST_P(NormFixture, DISABLED_Norm,
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES, NormType::all()))
{
const NormParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
const int normType = get<2>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type);
double res;
declare.in(src1, src2, WARMUP_RNG);
OCL_TEST_CYCLE() res = cv::norm(src1, src2, normType);
SANITY_CHECK(res, 1e-6, ERROR_RELATIVE);
}
///////////// Repeat ////////////////////////
typedef Size_MatType RepeatFixture;
OCL_PERF_TEST_P(RepeatFixture, Repeat,
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), nx = 2, ny = 2;
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(Size(srcSize.width * nx, srcSize.height * ny), type);
declare.in(src, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::repeat(src, nx, ny, dst);
SANITY_CHECK(dst);
}
///////////// Min ////////////////////////
typedef Size_MatType MinFixture;
OCL_PERF_TEST_P(MinFixture, Min,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::min(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// Max ////////////////////////
typedef Size_MatType MaxFixture;
OCL_PERF_TEST_P(MaxFixture, Max,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::max(src1, src2, dst);
SANITY_CHECK(dst);
}
///////////// InRange ////////////////////////
typedef Size_MatType InRangeFixture;
OCL_PERF_TEST_P(InRangeFixture, InRange,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
{
const Size_MatType_t params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), lb(srcSize, type), ub(srcSize, type), dst(srcSize, CV_8UC1);
declare.in(src, lb, ub, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::inRange(src, lb, ub, dst);
SANITY_CHECK(dst);
}
///////////// Normalize ////////////////////////
CV_ENUM(NormalizeModes, CV_MINMAX, CV_L2, CV_L1, CV_C)
typedef tuple<Size, MatType, NormalizeModes> NormalizeParams;
typedef TestBaseWithParam<NormalizeParams> NormalizeFixture;
OCL_PERF_TEST_P(NormalizeFixture, Normalize,
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, NormalizeModes::all()))
{
const NormalizeParams params = GetParam();
const Size srcSize = get<0>(params);
const int type = get<1>(params), mode = get<2>(params);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type), dst(srcSize, type);
declare.in(src, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::normalize(src, dst, 10, 110, mode);
SANITY_CHECK(dst, 5e-2);
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -0,0 +1,156 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
///////////// Merge////////////////////////
typedef tuple<Size, MatDepth, int> MergeParams;
typedef TestBaseWithParam<MergeParams> MergeFixture;
OCL_PERF_TEST_P(MergeFixture, Merge,
::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8U, CV_32F), Values(2, 3)))
{
const MergeParams params = GetParam();
const Size srcSize = get<0>(params);
const int depth = get<1>(params), cn = get<2>(params), dtype = CV_MAKE_TYPE(depth, cn);
checkDeviceMaxMemoryAllocSize(srcSize, dtype);
UMat dst(srcSize, dtype);
vector<UMat> src(cn);
for (vector<UMat>::iterator i = src.begin(), end = src.end(); i != end; ++i)
{
i->create(srcSize, CV_MAKE_TYPE(depth, 1));
declare.in(*i, WARMUP_RNG);
}
declare.out(dst);
OCL_TEST_CYCLE() cv::merge(src, dst);
SANITY_CHECK(dst);
}
///////////// Split ////////////////////////
typedef MergeParams SplitParams;
typedef TestBaseWithParam<SplitParams> SplitFixture;
OCL_PERF_TEST_P(SplitFixture, DISABLED_Split,
::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8U, CV_32F), Values(2, 3)))
{
const SplitParams params = GetParam();
const Size srcSize = get<0>(params);
const int depth = get<1>(params), cn = get<2>(params), type = CV_MAKE_TYPE(depth, cn);
ASSERT_TRUE(cn == 3 || cn == 2);
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat src(srcSize, type);
std::vector<UMat> dst(cn, UMat(srcSize, CV_MAKE_TYPE(depth, 1)));
declare.in(src, WARMUP_RNG);
for (int i = 0; i < cn; ++i)
declare.in(dst[i]);
OCL_TEST_CYCLE() cv::split(src, dst);
ASSERT_EQ(cn, (int)dst.size());
if (cn == 2)
{
UMat & dst0 = dst[0], & dst1 = dst[1];
SANITY_CHECK(dst0);
SANITY_CHECK(dst1);
}
else
{
UMat & dst0 = dst[0], & dst1 = dst[1], & dst2 = dst[2];
SANITY_CHECK(dst0);
SANITY_CHECK(dst1);
SANITY_CHECK(dst2);
}
}
///////////// MixChannels ////////////////////////
typedef tuple<Size, MatDepth> MixChannelsParams;
typedef TestBaseWithParam<MixChannelsParams> MixChannelsFixture;
OCL_PERF_TEST_P(MixChannelsFixture, MixChannels,
::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
OCL_PERF_ENUM(CV_8U, CV_32F)))
{
const MixChannelsParams params = GetParam();
const Size srcSize = get<0>(params);
const int depth = get<1>(params), type = CV_MAKE_TYPE(depth, 2), n = 2;
checkDeviceMaxMemoryAllocSize(srcSize, type);
UMat templ(srcSize, type);
std::vector<UMat> src(n, templ), dst(n, templ);
for (int i = 0; i < n; ++i)
declare.in(src[i], WARMUP_RNG).out(dst[i]);
int fromTo[] = { 1,2, 2,0, 0,3, 3,1 };
OCL_TEST_CYCLE() cv::mixChannels(src, dst, fromTo, 4);
UMat & dst0 = dst[0], & dst1 = dst[1];
SANITY_CHECK(dst0);
SANITY_CHECK(dst1);
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -0,0 +1,99 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
///////////// dft ////////////////////////
typedef tuple<Size, int> DftParams;
typedef TestBaseWithParam<DftParams> DftFixture;
OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
Values((int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE,
(int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE)))
{
const DftParams params = GetParam();
const Size srcSize = get<0>(params);
const int flags = get<1>(params);
UMat src(srcSize, CV_32FC2), dst(srcSize, CV_32FC2);
declare.in(src, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::dft(src, dst, flags | DFT_COMPLEX_OUTPUT);
SANITY_CHECK(dst, 1e-3);
}
///////////// MulSpectrums ////////////////////////
typedef tuple<Size, bool> MulSpectrumsParams;
typedef TestBaseWithParam<MulSpectrumsParams> MulSpectrumsFixture;
OCL_PERF_TEST_P(MulSpectrumsFixture, MulSpectrums,
::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
Bool()))
{
const MulSpectrumsParams params = GetParam();
const Size srcSize = get<0>(params);
const bool conj = get<1>(params);
UMat src1(srcSize, CV_32FC2), src2(srcSize, CV_32FC2), dst(srcSize, CV_32FC2);
declare.in(src1, src2, WARMUP_RNG).out(dst);
OCL_TEST_CYCLE() cv::mulSpectrums(src1, src2, dst, 0, conj);
SANITY_CHECK(dst, 1e-3);
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -0,0 +1,82 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Fangfang Bai, fangfang@multicorewareinc.com
// Jin Ma, jin@multicorewareinc.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "perf_precomp.hpp"
#include "opencv2/ts/ocl_perf.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
///////////// gemm ////////////////////////
typedef tuple<Size, int> GemmParams;
typedef TestBaseWithParam<GemmParams> GemmFixture;
OCL_PERF_TEST_P(GemmFixture, Gemm, ::testing::Combine(
::testing::Values(Size(1000, 1000), Size(1500, 1500)),
Values((int)cv::GEMM_3_T, (int)cv::GEMM_3_T | (int)cv::GEMM_2_T)))
{
GemmParams params = GetParam();
const Size srcSize = get<0>(params);
const int flags = get<1>(params);
UMat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
src3(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
declare.in(src1, src2, src3).out(dst);
randu(src1, -10.0f, 10.0f);
randu(src2, -10.0f, 10.0f);
randu(src3, -10.0f, 10.0f);
OCL_TEST_CYCLE() cv::gemm(src1, src2, 0.6, src3, 1.5, dst, flags);
SANITY_CHECK(dst, 0.01);
}
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -1409,7 +1409,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
bool use_opencl = _dst.kind() == _OutputArray::UMAT && ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
bool use_opencl = _dst.isUMat() && ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
@ -2877,11 +2877,121 @@ static InRangeFunc getInRangeFunc(int depth)
return inRangeTab[depth];
}
static bool ocl_inRange( InputArray _src, InputArray _lowerb,
InputArray _upperb, OutputArray _dst )
{
int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
Size ssize = _src.size(), lsize = _lowerb.size(), usize = _upperb.size();
int stype = _src.type(), ltype = _lowerb.type(), utype = _upperb.type();
int sdepth = CV_MAT_DEPTH(stype), ldepth = CV_MAT_DEPTH(ltype), udepth = CV_MAT_DEPTH(utype);
int cn = CV_MAT_CN(stype);
bool lbScalar = false, ubScalar = false;
if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
ssize != lsize || stype != ltype )
{
if( !checkScalar(_lowerb, stype, lkind, skind) )
CV_Error( CV_StsUnmatchedSizes,
"The lower bounary is neither an array of the same size and same type as src, nor a scalar");
lbScalar = true;
}
if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
ssize != usize || stype != utype )
{
if( !checkScalar(_upperb, stype, ukind, skind) )
CV_Error( CV_StsUnmatchedSizes,
"The upper bounary is neither an array of the same size and same type as src, nor a scalar");
ubScalar = true;
}
if (lbScalar != ubScalar)
return false;
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
haveScalar = lbScalar && ubScalar;
if ( (!doubleSupport && sdepth == CV_64F) ||
(!haveScalar && (sdepth != ldepth || sdepth != udepth)) )
return false;
ocl::Kernel ker("inrange", ocl::core::inrange_oclsrc,
format("%s-D cn=%d -D T=%s%s", haveScalar ? "-D HAVE_SCALAR " : "",
cn, ocl::typeToStr(sdepth), doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (ker.empty())
return false;
_dst.create(ssize, CV_8UC1);
UMat src = _src.getUMat(), dst = _dst.getUMat(), lscalaru, uscalaru;
Mat lscalar, uscalar;
if (lbScalar && ubScalar)
{
lscalar = _lowerb.getMat();
uscalar = _upperb.getMat();
size_t esz = src.elemSize();
size_t blocksize = 36;
AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
uchar *buf = alignPtr(_buf + blocksize*cn, 16);
if( ldepth != sdepth && sdepth < CV_32S )
{
int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
int* iubuf = ilbuf + cn;
BinaryFunc sccvtfunc = getConvertFunc(ldepth, CV_32S);
sccvtfunc(lscalar.data, 0, 0, 0, (uchar*)ilbuf, 0, Size(cn, 1), 0);
sccvtfunc(uscalar.data, 0, 0, 0, (uchar*)iubuf, 0, Size(cn, 1), 0);
int minval = cvRound(getMinVal(sdepth)), maxval = cvRound(getMaxVal(sdepth));
for( int k = 0; k < cn; k++ )
{
if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
ilbuf[k] = minval+1, iubuf[k] = minval;
}
lscalar = Mat(cn, 1, CV_32S, ilbuf);
uscalar = Mat(cn, 1, CV_32S, iubuf);
}
lscalar.convertTo(lscalar, stype);
uscalar.convertTo(uscalar, stype);
}
else
{
lscalaru = _lowerb.getUMat();
uscalaru = _upperb.getUMat();
}
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dstarg = ocl::KernelArg::WriteOnly(dst);
if (haveScalar)
{
lscalar.copyTo(lscalaru);
uscalar.copyTo(uscalaru);
ker.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(lscalaru),
ocl::KernelArg::PtrReadOnly(uscalaru));
}
else
ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru),
ocl::KernelArg::ReadOnlyNoSize(uscalaru));
size_t globalsize[2] = { ssize.width, ssize.height };
return ker.run(2, globalsize, NULL, false);
}
}
void cv::inRange(InputArray _src, InputArray _lowerb,
InputArray _upperb, OutputArray _dst)
{
if (ocl::useOpenCL() && _src.dims() <= 2 && _lowerb.dims() <= 2 &&
_upperb.dims() <= 2 && _dst.isUMat() && ocl_inRange(_src, _lowerb, _upperb, _dst))
return;
int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat();
@ -2905,14 +3015,14 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
ubScalar = true;
}
CV_Assert( ((int)lbScalar ^ (int)ubScalar) == 0 );
CV_Assert(lbScalar == ubScalar);
int cn = src.channels(), depth = src.depth();
size_t esz = src.elemSize();
size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
_dst.create(src.dims, src.size, CV_8U);
_dst.create(src.dims, src.size, CV_8UC1);
Mat dst = _dst.getMat();
InRangeFunc func = getInRangeFunc(depth);

View File

@ -612,16 +612,111 @@ void cv::mixChannels( const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts, cons
}
}
namespace cv {
static void getUMatIndex(const std::vector<UMat> & um, int cn, int & idx, int & cnidx)
{
int totalChannels = 0;
for (size_t i = 0, size = um.size(); i < size; ++i)
{
int ccn = um[i].channels();
totalChannels += ccn;
if (totalChannels == cn)
{
idx = (int)(i + 1);
cnidx = 0;
return;
}
else if (totalChannels > cn)
{
idx = (int)i;
cnidx = i == 0 ? cn : (cn - totalChannels + ccn);
return;
}
}
idx = cnidx = -1;
}
static bool ocl_mixChannels(InputArrayOfArrays _src, InputOutputArrayOfArrays _dst,
const int* fromTo, size_t npairs)
{
const std::vector<UMat> & src = *(const std::vector<UMat> *)_src.getObj();
std::vector<UMat> & dst = *(std::vector<UMat> *)_dst.getObj();
size_t nsrc = src.size(), ndst = dst.size();
CV_Assert(nsrc > 0 && ndst > 0);
Size size = src[0].size();
int depth = src[0].depth(), esz = CV_ELEM_SIZE(depth);
for (size_t i = 1, ssize = src.size(); i < ssize; ++i)
CV_Assert(src[i].size() == size && src[i].depth() == depth);
for (size_t i = 0, dsize = dst.size(); i < dsize; ++i)
CV_Assert(dst[i].size() == size && dst[i].depth() == depth);
String declsrc, decldst, declproc, declcn;
std::vector<UMat> srcargs(npairs), dstargs(npairs);
for (size_t i = 0; i < npairs; ++i)
{
int scn = fromTo[i<<1], dcn = fromTo[(i<<1) + 1];
int src_idx, src_cnidx, dst_idx, dst_cnidx;
getUMatIndex(src, scn, src_idx, src_cnidx);
getUMatIndex(dst, dcn, dst_idx, dst_cnidx);
CV_Assert(dst_idx >= 0 && src_idx >= 0);
srcargs[i] = src[src_idx];
srcargs[i].offset += src_cnidx * esz;
dstargs[i] = dst[dst_idx];
dstargs[i].offset += dst_cnidx * esz;
declsrc += format("DECLARE_INPUT_MAT(%d)", i);
decldst += format("DECLARE_OUTPUT_MAT(%d)", i);
declproc += format("PROCESS_ELEM(%d)", i);
declcn += format(" -D scn%d=%d -D dcn%d=%d", i, src[src_idx].channels(), i, dst[dst_idx].channels());
}
ocl::Kernel k("mixChannels", ocl::core::mixchannels_oclsrc,
format("-D T=%s -D DECLARE_INPUT_MATS=%s -D DECLARE_OUTPUT_MATS=%s"
" -D PROCESS_ELEMS=%s%s", ocl::memopTypeToStr(depth),
declsrc.c_str(), decldst.c_str(), declproc.c_str(), declcn.c_str()));
if (k.empty())
return false;
int argindex = 0;
for (size_t i = 0; i < npairs; ++i)
argindex = k.set(argindex, ocl::KernelArg::ReadOnlyNoSize(srcargs[i]));
for (size_t i = 0; i < npairs; ++i)
argindex = k.set(argindex, ocl::KernelArg::WriteOnlyNoSize(dstargs[i]));
k.set(k.set(argindex, size.height), size.width);
size_t globalsize[2] = { size.width, size.height };
return k.run(2, globalsize, NULL, false);
}
}
void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
const int* fromTo, size_t npairs)
{
if(npairs == 0)
if (npairs == 0 || fromTo == NULL)
return;
if (ocl::useOpenCL() && src.isUMatVector() && dst.isUMatVector() &&
ocl_mixChannels(src, dst, fromTo, npairs))
return;
bool src_is_mat = src.kind() != _InputArray::STD_VECTOR_MAT &&
src.kind() != _InputArray::STD_VECTOR_VECTOR;
src.kind() != _InputArray::STD_VECTOR_VECTOR &&
src.kind() != _InputArray::STD_VECTOR_UMAT;
bool dst_is_mat = dst.kind() != _InputArray::STD_VECTOR_MAT &&
dst.kind() != _InputArray::STD_VECTOR_VECTOR;
dst.kind() != _InputArray::STD_VECTOR_VECTOR &&
dst.kind() != _InputArray::STD_VECTOR_UMAT;
int i;
int nsrc = src_is_mat ? 1 : (int)src.total();
int ndst = dst_is_mat ? 1 : (int)dst.total();
@ -639,12 +734,19 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
const std::vector<int>& fromTo)
{
if(fromTo.empty())
if (fromTo.empty())
return;
if (ocl::useOpenCL() && src.isUMatVector() && dst.isUMatVector() &&
ocl_mixChannels(src, dst, &fromTo[0], fromTo.size()>>1))
return;
bool src_is_mat = src.kind() != _InputArray::STD_VECTOR_MAT &&
src.kind() != _InputArray::STD_VECTOR_VECTOR;
src.kind() != _InputArray::STD_VECTOR_VECTOR &&
src.kind() != _InputArray::STD_VECTOR_UMAT;
bool dst_is_mat = dst.kind() != _InputArray::STD_VECTOR_MAT &&
dst.kind() != _InputArray::STD_VECTOR_VECTOR;
dst.kind() != _InputArray::STD_VECTOR_VECTOR &&
dst.kind() != _InputArray::STD_VECTOR_UMAT;
int i;
int nsrc = src_is_mat ? 1 : (int)src.total();
int ndst = dst_is_mat ? 1 : (int)dst.total();
@ -1161,10 +1263,49 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
}
static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
{
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (!doubleSupport && depth == CV_64F)
return false;
char cvt[2][50];
int wdepth = std::max(depth, CV_32F);
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=uchar -D srcT1=%s"
" -D workT=%s -D convertToWT1=%s -D convertToDT=%s%s",
ocl::typeToStr(depth), ocl::typeToStr(wdepth),
ocl::convertTypeStr(depth, wdepth, 1, cvt[0]),
ocl::convertTypeStr(wdepth, CV_8U, 1, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
_dst.createSameSize(_src, CV_8UC(cn));
UMat src = _src.getUMat(), dst = _dst.getUMat();
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
dstarg = ocl::KernelArg::WriteOnly(dst, cn);
if (wdepth == CV_32F)
k.args(srcarg, dstarg, (float)alpha, (float)beta);
else if (wdepth == CV_64F)
k.args(srcarg, dstarg, alpha, beta);
size_t globalsize[2] = { src.cols * cn, src.rows };
return k.run(2, globalsize, NULL, false);
}
}
void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
{
if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat() &&
ocl_convertScaleAbs(_src, _dst, alpha, beta))
return;
Mat src = _src.getMat();
int cn = src.channels();
double scale[] = {alpha, beta};

View File

@ -1726,8 +1726,8 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags)
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
{
#ifdef HAVE_CLAMDFFT
if (ocl::useOpenCL() && ocl::haveAmdFft() && _dst.isUMat() && _src0.dims() <= 2
&& nonzero_rows == 0 && ocl_dft(_src0, _dst, flags))
if (ocl::useOpenCL() && ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
_dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0 && ocl_dft(_src0, _dst, flags))
return;
#endif
@ -2577,7 +2577,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
if( (flags & DFT_ROWS) || src.rows == 1 ||
if( (flags & DCT_ROWS) || src.rows == 1 ||
(src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
{
stage = end_stage = 0;
@ -2597,7 +2597,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
{
len = src.cols;
count = src.rows;
if( len == 1 && !(flags & DFT_ROWS) )
if( len == 1 && !(flags & DCT_ROWS) )
{
len = src.rows;
count = 1;

View File

@ -2364,12 +2364,31 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
return badPt.x < 0;
}
static bool ocl_patchNaNs( InputOutputArray _a, float value )
{
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=int"));
if (k.empty())
return false;
UMat a = _a.getUMat();
int cn = a.channels();
k.args(ocl::KernelArg::ReadOnlyNoSize(a),
ocl::KernelArg::WriteOnly(a), (float)value);
size_t globalsize[2] = { a.cols * cn, a.rows };
return k.run(2, globalsize, NULL, false);
}
void patchNaNs( InputOutputArray _a, double _val )
{
Mat a = _a.getMat();
CV_Assert( a.depth() == CV_32F );
CV_Assert( _a.depth() == CV_32F );
if (ocl::useOpenCL() && _a.isUMat() && _a.dims() <= 2 && ocl_patchNaNs(_a, (float)_val))
return;
Mat a = _a.getMat();
const Mat* arrays[] = {&a, 0};
int* ptrs[1];
NAryMatIterator it(arrays, (uchar**)ptrs);

View File

@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#ifdef HAVE_IPP
@ -724,7 +725,7 @@ static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
if (haveC)
ctrans ? transpose(matC, D) : matC.getMat().copyTo(D); // TODO fix it as soon as .copyTo works as expected
ctrans ? transpose(matC, D) : matC.copyTo(D);
else
D.setTo(Scalar::all(0));
@ -2154,20 +2155,61 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst,
typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, int len, const void* alpha);
static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
{
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F);
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
Size size = _src1.size();
if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() )
return false;
char cvt[2][50];
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
" -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s%s", ocl::typeToStr(depth),
ocl::typeToStr(wdepth), ocl::convertTypeStr(depth, wdepth, 1, cvt[0]),
ocl::convertTypeStr(wdepth, depth, 1, cvt[1]),
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
if (k.empty())
return false;
_dst.create(size, type);
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat();
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
dstarg = ocl::KernelArg::WriteOnly(dst, cn);
if (wdepth == CV_32F)
k.args(src1arg, src2arg, dstarg, (float)alpha);
else
k.args(src1arg, src2arg, dstarg, alpha);
size_t globalsize[2] = { dst.cols * cn, dst.rows };
return k.run(2, globalsize, NULL, false);
}
}
void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst )
{
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
int depth = src1.depth(), cn = src1.channels();
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
CV_Assert( type == _src2.type() );
if (ocl::useOpenCL() && _src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat() &&
ocl_scaleAdd(_src1, alpha, _src2, _dst, type))
return;
CV_Assert( src1.type() == src2.type() );
if( depth < CV_32F )
{
addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth);
return;
}
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
CV_Assert(src1.size == src2.size);
_dst.create(src1.dims, src1.size, src1.type());
Mat dst = _dst.getMat();

View File

@ -1430,6 +1430,16 @@ Size _InputArray::size(int i) const
return vv[i].size();
}
if( k == STD_VECTOR_UMAT )
{
const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
if( i < 0 )
return vv.empty() ? Size() : Size((int)vv.size(), 1);
CV_Assert( i < (int)vv.size() );
return vv[i].size();
}
if( k == OPENGL_BUFFER )
{
CV_Assert( i < 0 );
@ -2262,6 +2272,12 @@ void _OutputArray::release() const
return;
}
if( k == UMAT )
{
((UMat*)obj)->release();
return;
}
if( k == GPU_MAT )
{
((cuda::GpuMat*)obj)->release();
@ -2301,6 +2317,12 @@ void _OutputArray::release() const
return;
}
if( k == STD_VECTOR_UMAT )
{
((std::vector<UMat>*)obj)->clear();
return;
}
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
@ -2760,39 +2782,24 @@ void cv::transpose( InputArray _src, OutputArray _dst )
}
////////////////////////////////////// completeSymm /////////////////////////////////////////
void cv::completeSymm( InputOutputArray _m, bool LtoR )
{
Mat m = _m.getMat();
CV_Assert( m.dims <= 2 );
size_t step = m.step, esz = m.elemSize();
CV_Assert( m.dims <= 2 && m.rows == m.cols );
int i, j, nrows = m.rows, type = m.type();
int j0 = 0, j1 = nrows;
CV_Assert( m.rows == m.cols );
int rows = m.rows;
int j0 = 0, j1 = rows;
if( type == CV_32FC1 || type == CV_32SC1 )
uchar* data = m.data;
for( int i = 0; i < rows; i++ )
{
int* data = (int*)m.data;
size_t step = m.step/sizeof(data[0]);
for( i = 0; i < nrows; i++ )
{
if( !LtoR ) j1 = i; else j0 = i+1;
for( j = j0; j < j1; j++ )
data[i*step + j] = data[j*step + i];
}
if( !LtoR ) j1 = i; else j0 = i+1;
for( int j = j0; j < j1; j++ )
memcpy(data + (i*step + j*esz), data + (j*step + i*esz), esz);
}
else if( type == CV_64FC1 )
{
double* data = (double*)m.data;
size_t step = m.step/sizeof(data[0]);
for( i = 0; i < nrows; i++ )
{
if( !LtoR ) j1 = i; else j0 = i+1;
for( j = j0; j < j1; j++ )
data[i*step + j] = data[j*step + i];
}
}
else
CV_Error( CV_StsUnsupportedFormat, "" );
}

View File

@ -41,6 +41,9 @@
#include "precomp.hpp"
#include <map>
#include <string>
#include <sstream>
#include <iostream> // std::cerr
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
@ -1905,6 +1908,219 @@ const Device& Device::getDefault()
/////////////////////////////////////////////////////////////////////////////////////////
template <typename Functor, typename ObjectType>
inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
{
::size_t required;
cl_int err = f(obj, name, 0, NULL, &required);
if (err != CL_SUCCESS)
return err;
param.clear();
if (required > 0)
{
AutoBuffer<char> buf(required + 1);
char* ptr = (char*)buf; // cleanup is not needed
err = f(obj, name, required, ptr, NULL);
if (err != CL_SUCCESS)
return err;
param = ptr;
}
return CL_SUCCESS;
};
static void split(const std::string &s, char delim, std::vector<std::string> &elems) {
elems.clear();
if (s.size() == 0)
return;
std::istringstream ss(s);
std::string item;
while (!ss.eof())
{
std::getline(ss, item, delim);
elems.push_back(item);
}
}
// Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
// Sample: AMD:GPU:
// Sample: AMD:GPU:Tahiti
// Sample: :GPU|CPU: = '' = ':' = '::'
static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
{
std::vector<std::string> parts;
split(configurationStr, ':', parts);
if (parts.size() > 3)
{
std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
return false;
}
if (parts.size() > 2)
deviceNameOrID = parts[2];
if (parts.size() > 1)
{
split(parts[1], '|', deviceTypes);
}
if (parts.size() > 0)
{
platform = parts[0];
}
return true;
}
static cl_device_id selectOpenCLDevice()
{
std::string platform;
std::vector<std::string> deviceTypes;
std::string deviceName;
const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
if (configuration)
{
if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
return NULL;
}
bool isID = false;
int deviceID = -1;
if (deviceName.length() == 1)
// We limit ID range to 0..9, because we want to write:
// - '2500' to mean i5-2500
// - '8350' to mean AMD FX-8350
// - '650' to mean GeForce 650
// To extend ID range change condition to '> 0'
{
isID = true;
for (size_t i = 0; i < deviceName.length(); i++)
{
if (!isdigit(deviceName[i]))
{
isID = false;
break;
}
}
if (isID)
{
deviceID = atoi(deviceName.c_str());
CV_Assert(deviceID >= 0);
}
}
cl_int status = CL_SUCCESS;
std::vector<cl_platform_id> platforms;
{
cl_uint numPlatforms = 0;
status = clGetPlatformIDs(0, NULL, &numPlatforms);
CV_Assert(status == CL_SUCCESS);
if (numPlatforms == 0)
return NULL;
platforms.resize((size_t)numPlatforms);
status = clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms);
CV_Assert(status == CL_SUCCESS);
platforms.resize(numPlatforms);
}
int selectedPlatform = -1;
if (platform.length() > 0)
{
for (size_t i = 0; i < platforms.size(); i++)
{
std::string name;
status = getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name);
CV_Assert(status == CL_SUCCESS);
if (name.find(platform) != std::string::npos)
{
selectedPlatform = (int)i;
break;
}
}
if (selectedPlatform == -1)
{
std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
goto not_found;
}
}
if (deviceTypes.size() == 0)
{
if (!isID)
{
deviceTypes.push_back("GPU");
deviceTypes.push_back("CPU");
}
else
{
deviceTypes.push_back("ALL");
}
}
for (size_t t = 0; t < deviceTypes.size(); t++)
{
int deviceType = 0;
if (deviceTypes[t] == "GPU")
{
deviceType = Device::TYPE_GPU;
}
else if (deviceTypes[t] == "CPU")
{
deviceType = Device::TYPE_CPU;
}
else if (deviceTypes[t] == "ACCELERATOR")
{
deviceType = Device::TYPE_ACCELERATOR;
}
else if (deviceTypes[t] == "ALL")
{
deviceType = Device::TYPE_ALL;
}
else
{
std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
goto not_found;
}
std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
(selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
i++)
{
cl_uint count = 0;
status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
CV_Assert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
if (count == 0)
continue;
size_t base = devices.size();
devices.resize(base + count);
status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
CV_Assert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
}
for (size_t i = (isID ? deviceID : 0);
(isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
i++)
{
std::string name;
status = getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name);
CV_Assert(status == CL_SUCCESS);
if (isID || name.find(deviceName) != std::string::npos)
{
// TODO check for OpenCL 1.1
return devices[i];
}
}
}
not_found:
std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
<< " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
<< " Device types: ";
for (size_t t = 0; t < deviceTypes.size(); t++)
{
std::cerr << deviceTypes[t] << " ";
}
std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
return NULL;
}
struct Context2::Impl
{
Impl()
@ -1913,6 +2129,42 @@ struct Context2::Impl
handle = 0;
}
void setDefault()
{
CV_Assert(handle == NULL);
cl_device_id d = selectOpenCLDevice();
if (d == NULL)
return;
cl_platform_id pl = NULL;
cl_int status = clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL);
CV_Assert(status == CL_SUCCESS);
cl_context_properties prop[] =
{
CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
0
};
// !!! in the current implementation force the number of devices to 1 !!!
int nd = 1;
handle = clCreateContext(prop, nd, &d, 0, 0, &status);
CV_Assert(status == CL_SUCCESS);
bool ok = handle != 0 && status >= 0;
if( ok )
{
devices.resize(nd);
devices[0].set(d);
}
else
{
handle = NULL;
}
}
Impl(int dtype0)
{
refcount = 1;
@ -2022,6 +2274,21 @@ Context2::Context2(int dtype)
create(dtype);
}
bool Context2::create()
{
if( !haveOpenCL() )
return false;
if(p)
p->release();
p = new Impl();
if(!p->handle)
{
delete p;
p = 0;
}
return p != 0;
}
bool Context2::create(int dtype0)
{
if( !haveOpenCL() )
@ -2039,7 +2306,11 @@ bool Context2::create(int dtype0)
Context2::~Context2()
{
p->release();
if (p)
{
p->release();
p = NULL;
}
}
Context2::Context2(const Context2& c)
@ -2062,7 +2333,7 @@ Context2& Context2::operator = (const Context2& c)
void* Context2::ptr() const
{
return p->handle;
return p == NULL ? NULL : p->handle;
}
size_t Context2::ndevices() const
@ -2081,23 +2352,16 @@ Context2& Context2::getDefault(bool initialize)
static Context2 ctx;
if(!ctx.p && haveOpenCL())
{
if (!ctx.p)
ctx.p = new Impl();
if (initialize)
{
// do not create new Context2 right away.
// First, try to retrieve existing context of the same type.
// In its turn, Platform::getContext() may call Context2::create()
// if there is no such context.
ctx.create(Device::TYPE_ACCELERATOR);
if(!ctx.p)
ctx.create(Device::TYPE_DGPU);
if(!ctx.p)
ctx.create(Device::TYPE_IGPU);
if(!ctx.p)
ctx.create(Device::TYPE_CPU);
}
else
{
ctx.p = new Impl();
if (ctx.p->handle == NULL)
ctx.p->setDefault();
}
}
@ -2553,6 +2817,16 @@ size_t Kernel::workGroupSize() const
sizeof(val), &val, &retsz) >= 0 ? val : 0;
}
size_t Kernel::preferedWorkGroupSizeMultiple() const
{
if(!p)
return 0;
size_t val = 0, retsz = 0;
cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
sizeof(val), &val, &retsz) >= 0 ? val : 0;
}
bool Kernel::compileWorkGroupSize(size_t wsz[]) const
{
if(!p || !wsz)
@ -2616,11 +2890,16 @@ struct Program::Impl
if( retval >= 0 )
{
errmsg = String(buf);
CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str()));
printf("OpenCL program can not be built: %s", errmsg.c_str());
}
}
if( handle )
{
clReleaseProgram(handle);
handle = NULL;
}
}
CV_Assert(retval >= 0);
}
}

View File

@ -91,6 +91,9 @@
#else
#ifndef convertToWT2
#define convertToWT2 convertToWT1
#endif
#define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index))
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index))
@ -223,13 +226,17 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
#define convertToWT2
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
#elif defined OP_CONVERT
#define PROCESS_ELEM dstelem = convertToDT(srcelem1)
#elif defined OP_CONVERT_SCALE
#elif defined OP_CONVERT_SCALE_ABS
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha, workT beta
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta)
#define PROCESS_ELEM \
workT value = srcelem1 * alpha + beta; \
dstelem = convertToDT(value >= 0 ? value : -value)
#elif defined OP_SCALE_ADD
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , workT alpha
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * alpha + srcelem2)
#elif defined OP_CTP_AD || defined OP_CTP_AR
#ifdef OP_CTP_AD
@ -264,6 +271,13 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
dstelem = cos(alpha) * x; \
dstelem2 = sin(alpha) * x
#elif defined OP_PATCH_NANS
#undef EXTRA_PARAMS
#define EXTRA_PARAMS , int val
#define PROCESS_ELEM \
if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \
dstelem = val
#else
#error "unknown op type"
#endif

View File

@ -0,0 +1,89 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
__kernel void inrange(__global const uchar * src1ptr, int src1_step, int src1_offset,
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
#ifdef HAVE_SCALAR
__global const T * src2, __global const T * src3
#else
__global const uchar * src2ptr, int src2_step, int src2_offset,
__global const uchar * src3ptr, int src3_step, int src3_offset
#endif
)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int src1_index = mad24(y, src1_step, x*(int)sizeof(T)*cn + src1_offset);
int dst_index = mad24(y, dst_step, x + dst_offset);
__global const T * src1 = (__global const T *)(src1ptr + src1_index);
__global uchar * dst = dstptr + dst_index;
#ifndef HAVE_SCALAR
int src2_index = mad24(y, src2_step, x*(int)sizeof(T)*cn + src2_offset);
int src3_index = mad24(y, src3_step, x*(int)sizeof(T)*cn + src3_offset);
__global const T * src2 = (__global const T *)(src2ptr + src2_index);
__global const T * src3 = (__global const T *)(src3ptr + src3_index);
#endif
dst[0] = 255;
#pragma unroll
for (int c = 0; c < cn; ++c)
if ( src2[c] > src1[c] || src3[c] < src1[c] )
{
dst[0] = 0;
break;
}
}
}

View File

@ -0,0 +1,64 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the copyright holders or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#define DECLARE_INPUT_MAT(i) \
__global const uchar * src##i##ptr, int src##i##_step, int src##i##_offset,
#define DECLARE_OUTPUT_MAT(i) \
__global const uchar * dst##i##ptr, int dst##i##_step, int dst##i##_offset,
#define PROCESS_ELEM(i) \
int src##i##_index = mad24(src##i##_step, y, x * (int)sizeof(T) * scn##i + src##i##_offset); \
__global const T * src##i = (__global const T *)(src##i##ptr + src##i##_index); \
int dst##i##_index = mad24(dst##i##_step, y, x * (int)sizeof(T) * dcn##i + dst##i##_offset); \
__global T * dst##i = (__global T *)(dst##i##ptr + dst##i##_index); \
dst##i[0] = src##i[0];
__kernel void mixChannels(DECLARE_INPUT_MATS DECLARE_OUTPUT_MATS int rows, int cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < cols && y < rows)
{
PROCESS_ELEMS
}
}

View File

@ -107,7 +107,7 @@ std::wstring GetTempPathWinRT()
if (FAILED(WindowsCreateStringReference(RuntimeClass_Windows_Storage_ApplicationData,
(UINT32)wcslen(RuntimeClass_Windows_Storage_ApplicationData), &hstrHead, &str)))
return wstr;
if (FAILED(Windows::Foundation::GetActivationFactory(str, appdataFactory.ReleaseAndGetAddressOf())))
if (FAILED(RoGetActivationFactory(str, IID_PPV_ARGS(appdataFactory.ReleaseAndGetAddressOf()))))
return wstr;
if (FAILED(appdataFactory->get_Current(appdataRef.ReleaseAndGetAddressOf())))
return wstr;

View File

@ -42,6 +42,8 @@
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#include <cmath>
#ifdef HAVE_OPENCL
namespace cvtest {
@ -1234,13 +1236,186 @@ OCL_TEST_P(Normalize, Mat)
for (int i = 0, size = sizeof(modes) / sizeof(modes[0]); i < size; ++i)
{
OCL_OFF(cv::normalize(src1_roi, dst1_roi, 10, 110, modes[i], src1_roi.type(), mask_roi));
OCL_ON(cv::normalize(usrc1_roi, udst1_roi, 10, 110, modes[i], src1_roi.type(), umask_roi));
OCL_ON(cv::normalize(usrc1_roi, udst1_roi, 10, 110, modes[i], src1_roi.type(), umask_roi));
Near(1);
}
}
}
//////////////////////////////////////// InRange ///////////////////////////////////////////////
PARAM_TEST_CASE(InRange, MatDepth, Channels, bool /*Scalar or not*/, bool /*Roi*/)
{
int depth;
int cn;
bool scalars, use_roi;
cv::Scalar val1, val2;
TEST_DECLARE_INPUT_PARAMETER(src1)
TEST_DECLARE_INPUT_PARAMETER(src2)
TEST_DECLARE_INPUT_PARAMETER(src3)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
depth = GET_PARAM(0);
cn = GET_PARAM(1);
scalars = GET_PARAM(2);
use_roi = GET_PARAM(3);
}
virtual void generateTestData()
{
const int type = CV_MAKE_TYPE(depth, cn);
Size roiSize = randomSize(1, MAX_VALUE);
Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src1, src1_roi, roiSize, src1Border, type, -40, 40);
Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src2, src2_roi, roiSize, src2Border, type, -40, 40);
Border src3Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src3, src3_roi, roiSize, src3Border, type, -40, 40);
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, CV_8UC1, 5, 16);
val1 = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0),
rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0));
val2 = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0),
rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0));
UMAT_UPLOAD_INPUT_PARAMETER(src1)
UMAT_UPLOAD_INPUT_PARAMETER(src2)
UMAT_UPLOAD_INPUT_PARAMETER(src3)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
void Near()
{
OCL_EXPECT_MATS_NEAR(dst, 0)
}
};
OCL_TEST_P(InRange, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::inRange(src1_roi, src2_roi, src3_roi, dst_roi));
OCL_ON(cv::inRange(usrc1_roi, usrc2_roi, usrc3_roi, udst_roi));
Near();
}
}
OCL_TEST_P(InRange, Scalar)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::inRange(src1_roi, val1, val2, dst_roi));
OCL_ON(cv::inRange(usrc1_roi, val1, val2, udst_roi));
Near();
}
}
//////////////////////////////// ConvertScaleAbs ////////////////////////////////////////////////
typedef ArithmTestBase ConvertScaleAbs;
OCL_TEST_P(ConvertScaleAbs, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::convertScaleAbs(src1_roi, dst1_roi, val[0], val[1]));
OCL_ON(cv::convertScaleAbs(usrc1_roi, udst1_roi, val[0], val[1]));
Near(depth <= CV_32S ? 1 : 1e-6);
}
}
//////////////////////////////// ScaleAdd ////////////////////////////////////////////////
typedef ArithmTestBase ScaleAdd;
OCL_TEST_P(ScaleAdd, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi));
OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi));
Near(depth <= CV_32S ? 1 : 1e-6);
}
}
//////////////////////////////// PatchNans ////////////////////////////////////////////////
PARAM_TEST_CASE(PatchNaNs, Channels, bool)
{
int cn;
bool use_roi;
double value;
TEST_DECLARE_INPUT_PARAMETER(src)
virtual void SetUp()
{
cn = GET_PARAM(0);
use_roi = GET_PARAM(1);
}
virtual void generateTestData()
{
const int type = CV_MAKE_TYPE(CV_32F, cn);
Size roiSize = randomSize(1, 10);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -40, 40);
// generating NaNs
roiSize.width *= cn;
for (int y = 0; y < roiSize.height; ++y)
{
float * const ptr = src_roi.ptr<float>(y);
for (int x = 0; x < roiSize.width; ++x)
ptr[x] = randomInt(-1, 1) == 0 ? std::numeric_limits<float>::quiet_NaN() : ptr[x];
}
value = randomDouble(-100, 100);
UMAT_UPLOAD_INPUT_PARAMETER(src)
}
void Near()
{
OCL_EXPECT_MATS_NEAR(src, 0)
}
};
OCL_TEST_P(PatchNaNs, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
generateTestData();
OCL_OFF(cv::patchNaNs(src_roi, value));
OCL_ON(cv::patchNaNs(usrc_roi, value));
Near();
}
}
//////////////////////////////////////// Instantiation /////////////////////////////////////////
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
@ -1276,7 +1451,10 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx_Mask, Combine(OCL_ALL_DEPTHS, ::te
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool()));
} } // namespace cvtest::ocl

View File

@ -52,7 +52,9 @@
namespace cvtest {
namespace ocl {
PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool)
//////////////////////////////////////// Merge ///////////////////////////////////////////////
PARAM_TEST_CASE(Merge, MatDepth, Channels, bool)
{
int depth, cn;
bool use_roi;
@ -75,7 +77,7 @@ PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool)
CV_Assert(cn >= 1 && cn <= 4);
}
void random_roi()
void generateTestData()
{
Size roiSize = randomSize(1, MAX_VALUE);
@ -117,13 +119,11 @@ PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool)
}
};
typedef MergeTestBase Merge;
OCL_TEST_P(Merge, Accuracy)
{
for(int j = 0; j < test_loop_times; j++)
{
random_roi();
generateTestData();
OCL_OFF(cv::merge(src_roi, dst_roi));
OCL_ON(cv::merge(usrc_roi, udst_roi));
@ -132,7 +132,9 @@ OCL_TEST_P(Merge, Accuracy)
}
}
PARAM_TEST_CASE(SplitTestBase, MatType, Channels, bool)
//////////////////////////////////////// Split ///////////////////////////////////////////////
PARAM_TEST_CASE(Split, MatType, Channels, bool)
{
int depth, cn;
bool use_roi;
@ -155,7 +157,7 @@ PARAM_TEST_CASE(SplitTestBase, MatType, Channels, bool)
CV_Assert(cn >= 1 && cn <= 4);
}
void random_roi()
void generateTestData()
{
Size roiSize = randomSize(1, MAX_VALUE);
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
@ -195,13 +197,11 @@ PARAM_TEST_CASE(SplitTestBase, MatType, Channels, bool)
}
};
typedef SplitTestBase Split;
OCL_TEST_P(Split, DISABLED_Accuracy)
{
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
generateTestData();
OCL_OFF(cv::split(src_roi, dst_roi));
OCL_ON(cv::split(usrc_roi, udst_roi));
@ -214,8 +214,150 @@ OCL_TEST_P(Split, DISABLED_Accuracy)
}
}
OCL_INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(SplitMerge, Split, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
//////////////////////////////////////// MixChannels ///////////////////////////////////////////////
PARAM_TEST_CASE(MixChannels, MatType, bool)
{
int depth;
bool use_roi;
TEST_DECLARE_INPUT_PARAMETER(src1)
TEST_DECLARE_INPUT_PARAMETER(src2)
TEST_DECLARE_INPUT_PARAMETER(src3)
TEST_DECLARE_INPUT_PARAMETER(src4)
TEST_DECLARE_OUTPUT_PARAMETER(dst1)
TEST_DECLARE_OUTPUT_PARAMETER(dst2)
TEST_DECLARE_OUTPUT_PARAMETER(dst3)
TEST_DECLARE_OUTPUT_PARAMETER(dst4)
std::vector<Mat> src_roi, dst_roi, dst;
std::vector<UMat> usrc_roi, udst_roi, udst;
std::vector<int> fromTo;
virtual void SetUp()
{
depth = GET_PARAM(0);
use_roi = GET_PARAM(1);
}
// generate number of channels and create type
int type()
{
int cn = randomInt(1, 5);
return CV_MAKE_TYPE(depth, cn);
}
void generateTestData()
{
src_roi.clear();
dst_roi.clear();
dst.clear();
usrc_roi.clear();
udst_roi.clear();
udst.clear();
fromTo.clear();
Size roiSize = randomSize(1, MAX_VALUE);
{
Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src1, src1_roi, roiSize, src1Border, type(), 2, 11);
Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src2, src2_roi, roiSize, src2Border, type(), -1540, 1740);
Border src3Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src3, src3_roi, roiSize, src3Border, type(), -1540, 1740);
Border src4Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(src4, src4_roi, roiSize, src4Border, type(), -1540, 1740);
}
{
Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type(), 2, 11);
Border dst2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst2, dst2_roi, roiSize, dst2Border, type(), -1540, 1740);
Border dst3Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst3, dst3_roi, roiSize, dst3Border, type(), -1540, 1740);
Border dst4Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
randomSubMat(dst4, dst4_roi, roiSize, dst4Border, type(), -1540, 1740);
}
UMAT_UPLOAD_INPUT_PARAMETER(src1)
UMAT_UPLOAD_INPUT_PARAMETER(src2)
UMAT_UPLOAD_INPUT_PARAMETER(src3)
UMAT_UPLOAD_INPUT_PARAMETER(src4)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst1)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst2)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst3)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst4)
int nsrc = randomInt(1, 5), ndst = randomInt(1, 5);
src_roi.push_back(src1_roi), usrc_roi.push_back(usrc1_roi);
if (nsrc >= 2)
src_roi.push_back(src2_roi), usrc_roi.push_back(usrc2_roi);
if (nsrc >= 3)
src_roi.push_back(src3_roi), usrc_roi.push_back(usrc3_roi);
if (nsrc >= 4)
src_roi.push_back(src4_roi), usrc_roi.push_back(usrc4_roi);
dst_roi.push_back(dst1_roi), udst_roi.push_back(udst1_roi),
dst.push_back(dst1), udst.push_back(udst1);
if (ndst >= 2)
dst_roi.push_back(dst2_roi), udst_roi.push_back(udst2_roi),
dst.push_back(dst2), udst.push_back(udst2);
if (ndst >= 3)
dst_roi.push_back(dst3_roi), udst_roi.push_back(udst3_roi),
dst.push_back(dst3), udst.push_back(udst3);
if (ndst >= 4)
dst_roi.push_back(dst4_roi), udst_roi.push_back(udst4_roi),
dst.push_back(dst4), udst.push_back(udst4);
int scntotal = 0, dcntotal = 0;
for (int i = 0; i < nsrc; ++i)
scntotal += src_roi[i].channels();
for (int i = 0; i < ndst; ++i)
dcntotal += dst_roi[i].channels();
int npairs = randomInt(1, std::min(scntotal, dcntotal) + 1);
fromTo.resize(npairs << 1);
for (int i = 0; i < npairs; ++i)
{
fromTo[i<<1] = randomInt(0, scntotal);
fromTo[(i<<1)+1] = randomInt(0, dcntotal);
}
}
};
OCL_TEST_P(MixChannels, Accuracy)
{
for (int j = 0; j < test_loop_times + 10; j++)
{
generateTestData();
OCL_OFF(cv::mixChannels(src_roi, dst_roi, fromTo));
OCL_ON(cv::mixChannels(usrc_roi, udst_roi, fromTo));
for (size_t i = 0, size = dst_roi.size(); i < size; ++i)
{
EXPECT_MAT_NEAR(dst[i], udst[i], 0.0);
EXPECT_MAT_NEAR(dst_roi[i], udst_roi[i], 0.0);
}
}
}
//////////////////////////////////////// Instantiation ///////////////////////////////////////////////
OCL_INSTANTIATE_TEST_CASE_P(Channels, Merge, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Channels, Split, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Channels, MixChannels, Combine(OCL_ALL_DEPTHS, Bool()));
} } // namespace cvtest::ocl

View File

@ -25,7 +25,7 @@ TEST(Core_Drawing, _914)
}
TEST(Core_OutputArraySreate, _1997)
TEST(Core_OutputArrayCreate, _1997)
{
struct local {
static void create(OutputArray arr, Size submatSize, int type)

View File

@ -281,7 +281,7 @@ CUDA_TEST_P(ConvertTo, WithOutScaling)
cv::Mat dst_gold;
src.convertTo(dst_gold, depth2);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
}
}

View File

@ -177,7 +177,7 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
};
// desabled while resize does not fixed
CUDA_TEST_P(HOG, Detect)
CUDA_TEST_P(HOG, DISABLED_Detect)
{
cv::Mat img_rgb = readImage("hog/road.png");
ASSERT_FALSE(img_rgb.empty());

View File

@ -49,6 +49,8 @@ using namespace perf;
//////////////////////////////////////////////////////////////////////
// GEMM
#ifdef HAVE_CUBLAS
CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
#define ALL_GEMM_FLAGS Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), \
GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
@ -98,6 +100,8 @@ PERF_TEST_P(Sz_Type_Flags, GEMM,
}
}
#endif
//////////////////////////////////////////////////////////////////////
// MulSpectrums

View File

@ -2514,7 +2514,7 @@ CUDA_TEST_P(AddWeighted, Accuracy)
cv::Mat dst_gold;
cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-3);
EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 2.0 : 1e-3);
}
}

View File

@ -734,7 +734,7 @@ CUDA_TEST_P(Normalize, WithOutMask)
cv::Mat dst_gold;
cv::normalize(src, dst_gold, alpha, beta, norm_type, type);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
}
CUDA_TEST_P(Normalize, WithMask)

View File

@ -715,7 +715,7 @@ CUDA_TEST_P(CvtColor, BGR2YCrCb)
cv::Mat dst_gold;
cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YCrCb);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
}
CUDA_TEST_P(CvtColor, RGB2YCrCb)
@ -728,7 +728,7 @@ CUDA_TEST_P(CvtColor, RGB2YCrCb)
cv::Mat dst_gold;
cv::cvtColor(src, dst_gold, cv::COLOR_RGB2YCrCb);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
}
CUDA_TEST_P(CvtColor, BGR2YCrCb4)
@ -749,7 +749,7 @@ CUDA_TEST_P(CvtColor, BGR2YCrCb4)
cv::split(h_dst, channels);
cv::merge(channels, 3, h_dst);
EXPECT_MAT_NEAR(dst_gold, h_dst, 1e-5);
EXPECT_MAT_NEAR(dst_gold, h_dst, 1.0);
}
CUDA_TEST_P(CvtColor, RGBA2YCrCb4)
@ -771,7 +771,7 @@ CUDA_TEST_P(CvtColor, RGBA2YCrCb4)
cv::split(h_dst, channels);
cv::merge(channels, 3, h_dst);
EXPECT_MAT_NEAR(dst_gold, h_dst, 1e-5);
EXPECT_MAT_NEAR(dst_gold, h_dst, 1.0);
}
CUDA_TEST_P(CvtColor, YCrCb2BGR)

View File

@ -444,7 +444,7 @@ PERF_TEST_P(ImagePair, OpticalFlowBM,
}
}
PERF_TEST_P(ImagePair, FastOpticalFlowBM,
PERF_TEST_P(ImagePair, DISABLED_FastOpticalFlowBM,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
declare.time(400);

View File

@ -297,6 +297,11 @@ public:
trees_ = get_param(params,"trees",4);
root = new NodePtr[trees_];
indices = new int*[trees_];
for (int i=0; i<trees_; ++i) {
root[i] = NULL;
indices[i] = NULL;
}
}
HierarchicalClusteringIndex(const HierarchicalClusteringIndex&);
@ -309,11 +314,34 @@ public:
*/
virtual ~HierarchicalClusteringIndex()
{
free_elements();
if (root!=NULL) {
delete[] root;
}
if (indices!=NULL) {
delete[] indices;
}
}
/**
* Release the inner elements of indices[]
*/
void free_elements()
{
if (indices!=NULL) {
for(int i=0; i<trees_; ++i) {
if (indices[i]!=NULL) {
delete[] indices[i];
indices[i] = NULL;
}
}
}
}
/**
* Returns size of index.
*/
@ -348,6 +376,9 @@ public:
if (branching_<2) {
throw FLANNException("Branching factor must be at least 2");
}
free_elements();
for (int i=0; i<trees_; ++i) {
indices[i] = new int[size_];
for (size_t j=0; j<size_; ++j) {
@ -387,6 +418,17 @@ public:
load_value(stream, centers_init_);
load_value(stream, leaf_size_);
load_value(stream, memoryCounter);
free_elements();
if (root!=NULL) {
delete[] root;
}
if (indices!=NULL) {
delete[] indices;
}
indices = new int*[trees_];
root = new NodePtr[trees_];
for (int i=0; i<trees_; ++i) {

View File

@ -222,6 +222,12 @@ elseif(HAVE_QTKIT)
list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit")
endif()
if(HAVE_INTELPERC)
list(APPEND highgui_srcs src/cap_intelperc.cpp)
ocv_include_directories(${INTELPERC_INCLUDE_DIR})
list(APPEND HIGHGUI_LIBRARIES ${INTELPERC_LIBRARIES})
endif(HAVE_INTELPERC)
if(IOS)
add_definitions(-DHAVE_IOS=1)
list(APPEND highgui_srcs src/ios_conversions.mm src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)

View File

@ -271,7 +271,8 @@ enum { CAP_ANY = 0, // autodetect
CAP_XIAPI = 1100, // XIMEA Camera API
CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
CAP_MSMF = 1400 // Microsoft Media Foundation (via videoInput)
CAP_MSMF = 1400, // Microsoft Media Foundation (via videoInput)
CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
};
// generic properties (based on DC1394 properties)
@ -496,6 +497,26 @@ enum { CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
};
enum { CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007
};
// Intel PerC streams
enum { CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR
};
enum { CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
CAP_INTELPERC_IMAGE = 3
};
class CV_EXPORTS_W VideoCapture
{

View File

@ -313,7 +313,9 @@ enum
CV_CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
CV_CAP_GIGANETIX = 1300 // Smartek Giganetix GigEVisionSDK
CV_CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
CV_CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
};
/* start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*) */
@ -459,16 +461,29 @@ enum
CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002,
CV_CAP_PROP_IOS_DEVICE_FLASH = 9003,
CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005,
// Properties of cameras available through Smartek Giganetix Ethernet Vision interface
/* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
,CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002,
CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003,
CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004,
CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006,
CV_CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
CV_CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007,
// Intel PerC streams
CV_CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
CV_CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
CV_CAP_INTELPERC_GENERATORS_MASK = CV_CAP_INTELPERC_DEPTH_GENERATOR + CV_CAP_INTELPERC_IMAGE_GENERATOR
};
enum
@ -549,6 +564,14 @@ enum
CV_CAP_ANDROID_ANTIBANDING_OFF
};
enum
{
CV_CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
CV_CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
CV_CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
CV_CAP_INTELPERC_IMAGE = 3
};
/* retrieve or set capture properties */
CVAPI(double) cvGetCaptureProperty( CvCapture* capture, int property_id );
CVAPI(int) cvSetCaptureProperty( CvCapture* capture, int property_id, double value );

View File

@ -155,6 +155,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
#endif
#ifdef HAVE_GIGE_API
CV_CAP_GIGANETIX,
#endif
#ifdef HAVE_INTELPERC
CV_CAP_INTELPERC,
#endif
-1
};
@ -193,6 +196,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_ANDROID_NATIVE_CAMERA) || \
defined(HAVE_GIGE_API) || \
defined(HAVE_INTELPERC) || \
(0)
// local variable to memorize the captured device
CvCapture *capture;
@ -342,6 +346,13 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
break; // CV_CAP_GIGANETIX
#endif
#ifdef HAVE_INTELPERC
case CV_CAP_INTELPERC:
capture = cvCreateCameraCapture_IntelPerC(index);
if (capture)
return capture;
break; // CV_CAP_INTEL_PERC
#endif
}
}

View File

@ -0,0 +1,714 @@
#include "precomp.hpp"
#ifdef HAVE_INTELPERC
#include "pxcsession.h"
#include "pxcsmartptr.h"
#include "pxccapture.h"
class CvIntelPerCStreamBase
{
protected:
struct FrameInternal
{
IplImage* retrieveFrame()
{
if (m_mat.empty())
return NULL;
m_iplHeader = IplImage(m_mat);
return &m_iplHeader;
}
cv::Mat m_mat;
private:
IplImage m_iplHeader;
};
public:
CvIntelPerCStreamBase()
: m_profileIdx(-1)
, m_frameIdx(0)
, m_timeStampStartNS(0)
{
}
virtual ~CvIntelPerCStreamBase()
{
}
bool isValid()
{
return (m_device.IsValid() && m_stream.IsValid());
}
bool grabFrame()
{
if (!m_stream.IsValid())
return false;
if (-1 == m_profileIdx)
{
if (!setProperty(CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0))
return false;
}
PXCSmartPtr<PXCImage> pxcImage; PXCSmartSP sp;
if (PXC_STATUS_NO_ERROR > m_stream->ReadStreamAsync(&pxcImage, &sp))
return false;
if (PXC_STATUS_NO_ERROR > sp->Synchronize())
return false;
if (0 == m_timeStampStartNS)
m_timeStampStartNS = pxcImage->QueryTimeStamp();
m_timeStamp = (double)((pxcImage->QueryTimeStamp() - m_timeStampStartNS) / 10000);
m_frameIdx++;
return prepareIplImage(pxcImage);
}
int getProfileIDX() const
{
return m_profileIdx;
}
public:
virtual bool initStream(PXCSession *session) = 0;
virtual double getProperty(int propIdx)
{
double ret = 0.0;
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_PROFILE_COUNT:
ret = (double)m_profiles.size();
break;
case CV_CAP_PROP_FRAME_WIDTH :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
ret = (double)m_profiles[m_profileIdx].imageInfo.width;
break;
case CV_CAP_PROP_FRAME_HEIGHT :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
ret = (double)m_profiles[m_profileIdx].imageInfo.height;
break;
case CV_CAP_PROP_FPS :
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
{
ret = ((double)m_profiles[m_profileIdx].frameRateMin.numerator / (double)m_profiles[m_profileIdx].frameRateMin.denominator
+ (double)m_profiles[m_profileIdx].frameRateMax.numerator / (double)m_profiles[m_profileIdx].frameRateMax.denominator) / 2.0;
}
break;
case CV_CAP_PROP_POS_FRAMES:
ret = (double)m_frameIdx;
break;
case CV_CAP_PROP_POS_MSEC:
ret = m_timeStamp;
break;
};
return ret;
}
virtual bool setProperty(int propIdx, double propVal)
{
bool isSet = false;
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_PROFILE_IDX:
{
int propValInt = (int)propVal;
if ((0 <= propValInt) && (propValInt < m_profiles.size()))
{
if (m_profileIdx != propValInt)
{
m_profileIdx = propValInt;
if (m_stream.IsValid())
m_stream->SetProfile(&m_profiles[m_profileIdx]);
m_frameIdx = 0;
m_timeStampStartNS = 0;
}
isSet = true;
}
}
break;
};
return isSet;
}
protected:
PXCSmartPtr<PXCCapture::Device> m_device;
bool initDevice(PXCSession *session)
{
if (NULL == session)
return false;
pxcStatus sts = PXC_STATUS_NO_ERROR;
PXCSession::ImplDesc templat;
memset(&templat,0,sizeof(templat));
templat.group = PXCSession::IMPL_GROUP_SENSOR;
templat.subgroup= PXCSession::IMPL_SUBGROUP_VIDEO_CAPTURE;
for (int modidx = 0; PXC_STATUS_NO_ERROR <= sts; modidx++)
{
PXCSession::ImplDesc desc;
sts = session->QueryImpl(&templat, modidx, &desc);
if (PXC_STATUS_NO_ERROR > sts)
break;
PXCSmartPtr<PXCCapture> capture;
sts = session->CreateImpl<PXCCapture>(&desc, &capture);
if (!capture.IsValid())
continue;
/* enumerate devices */
for (int devidx = 0; PXC_STATUS_NO_ERROR <= sts; devidx++)
{
PXCSmartPtr<PXCCapture::Device> device;
sts = capture->CreateDevice(devidx, &device);
if (PXC_STATUS_NO_ERROR <= sts)
{
m_device = device.ReleasePtr();
return true;
}
}
}
return false;
}
PXCSmartPtr<PXCCapture::VideoStream> m_stream;
void initStreamImpl(PXCImage::ImageType type)
{
if (!m_device.IsValid())
return;
pxcStatus sts = PXC_STATUS_NO_ERROR;
/* enumerate streams */
for (int streamidx = 0; PXC_STATUS_NO_ERROR <= sts; streamidx++)
{
PXCCapture::Device::StreamInfo sinfo;
sts = m_device->QueryStream(streamidx, &sinfo);
if (PXC_STATUS_NO_ERROR > sts)
break;
if (PXCCapture::VideoStream::CUID != sinfo.cuid)
continue;
if (type != sinfo.imageType)
continue;
sts = m_device->CreateStream<PXCCapture::VideoStream>(streamidx, &m_stream);
if (PXC_STATUS_NO_ERROR == sts)
break;
m_stream.ReleaseRef();
}
}
protected:
std::vector<PXCCapture::VideoStream::ProfileInfo> m_profiles;
int m_profileIdx;
int m_frameIdx;
pxcU64 m_timeStampStartNS;
double m_timeStamp;
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& /*pinfo*/)
{
return true;
}
void enumProfiles()
{
m_profiles.clear();
if (!m_stream.IsValid())
return;
pxcStatus sts = PXC_STATUS_NO_ERROR;
for (int profidx = 0; PXC_STATUS_NO_ERROR <= sts; profidx++)
{
PXCCapture::VideoStream::ProfileInfo pinfo;
sts = m_stream->QueryProfile(profidx, &pinfo);
if (PXC_STATUS_NO_ERROR > sts)
break;
if (validProfile(pinfo))
m_profiles.push_back(pinfo);
}
}
virtual bool prepareIplImage(PXCImage *pxcImage) = 0;
};
class CvIntelPerCStreamImage
: public CvIntelPerCStreamBase
{
public:
CvIntelPerCStreamImage()
{
}
virtual ~CvIntelPerCStreamImage()
{
}
virtual bool initStream(PXCSession *session)
{
if (!initDevice(session))
return false;
initStreamImpl(PXCImage::IMAGE_TYPE_COLOR);
if (!m_stream.IsValid())
return false;
enumProfiles();
return true;
}
virtual double getProperty(int propIdx)
{
switch (propIdx)
{
case CV_CAP_PROP_BRIGHTNESS:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_CONTRAST:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_SATURATION:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_HUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_GAMMA:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_SHARPNESS:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_GAIN:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_BACKLIGHT:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_EXPOSURE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, &fret))
return (double)fret;
return 0.0;
}
break;
//Add image stream specific properties
}
return CvIntelPerCStreamBase::getProperty(propIdx);
}
virtual bool setProperty(int propIdx, double propVal)
{
switch (propIdx)
{
case CV_CAP_PROP_BRIGHTNESS:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, (float)propVal));
}
break;
case CV_CAP_PROP_CONTRAST:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, (float)propVal));
}
break;
case CV_CAP_PROP_SATURATION:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, (float)propVal));
}
break;
case CV_CAP_PROP_HUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, (float)propVal));
}
break;
case CV_CAP_PROP_GAMMA:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, (float)propVal));
}
break;
case CV_CAP_PROP_SHARPNESS:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, (float)propVal));
}
break;
case CV_CAP_PROP_GAIN:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, (float)propVal));
}
break;
case CV_CAP_PROP_BACKLIGHT:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, (float)propVal));
}
break;
case CV_CAP_PROP_EXPOSURE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, (float)propVal));
}
break;
//Add image stream specific properties
}
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
}
public:
IplImage* retrieveFrame()
{
return m_frame.retrieveFrame();
}
protected:
FrameInternal m_frame;
bool prepareIplImage(PXCImage *pxcImage)
{
if (NULL == pxcImage)
return false;
PXCImage::ImageInfo info;
pxcImage->QueryInfo(&info);
PXCImage::ImageData data;
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, PXCImage::COLOR_FORMAT_RGB24, &data);
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
return false;
cv::Mat temp(info.height, info.width, CV_8UC3, data.planes[0], data.pitches[0]);
temp.copyTo(m_frame.m_mat);
pxcImage->ReleaseAccess(&data);
return true;
}
};
class CvIntelPerCStreamDepth
: public CvIntelPerCStreamBase
{
public:
CvIntelPerCStreamDepth()
{
}
virtual ~CvIntelPerCStreamDepth()
{
}
virtual bool initStream(PXCSession *session)
{
if (!initDevice(session))
return false;
initStreamImpl(PXCImage::IMAGE_TYPE_DEPTH);
if (!m_stream.IsValid())
return false;
enumProfiles();
return true;
}
virtual double getProperty(int propIdx)
{
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
{
if (!m_device.IsValid())
return 0.0;
float fret = 0.0f;
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, &fret))
return (double)fret;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ:
{
if (!m_device.IsValid())
return 0.0f;
PXCPointF32 ptf;
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
return (double)ptf.x;
return 0.0;
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT:
{
if (!m_device.IsValid())
return 0.0f;
PXCPointF32 ptf;
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
return (double)ptf.y;
return 0.0;
}
break;
//Add depth stream sepcific properties
}
return CvIntelPerCStreamBase::getProperty(propIdx);
}
virtual bool setProperty(int propIdx, double propVal)
{
switch (propIdx)
{
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, (float)propVal));
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, (float)propVal));
}
break;
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
{
if (!m_device.IsValid())
return false;
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, (float)propVal));
}
break;
//Add depth stream sepcific properties
}
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
}
public:
IplImage* retrieveDepthFrame()
{
return m_frameDepth.retrieveFrame();
}
IplImage* retrieveIRFrame()
{
return m_frameIR.retrieveFrame();
}
IplImage* retrieveUVFrame()
{
return m_frameUV.retrieveFrame();
}
protected:
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& pinfo)
{
return (PXCImage::COLOR_FORMAT_DEPTH == pinfo.imageInfo.format);
}
protected:
FrameInternal m_frameDepth;
FrameInternal m_frameIR;
FrameInternal m_frameUV;
bool prepareIplImage(PXCImage *pxcImage)
{
if (NULL == pxcImage)
return false;
PXCImage::ImageInfo info;
pxcImage->QueryInfo(&info);
PXCImage::ImageData data;
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, &data);
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
return false;
if (PXCImage::COLOR_FORMAT_DEPTH != data.format)
return false;
{
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[0], data.pitches[0]);
temp.copyTo(m_frameDepth.m_mat);
}
{
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[1], data.pitches[1]);
temp.copyTo(m_frameIR.m_mat);
}
{
cv::Mat temp(info.height, info.width, CV_32FC2, data.planes[2], data.pitches[2]);
temp.copyTo(m_frameUV.m_mat);
}
pxcImage->ReleaseAccess(&data);
return true;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
class CvCapture_IntelPerC : public CvCapture
{
public:
CvCapture_IntelPerC(int /*index*/)
: m_contextOpened(false)
{
pxcStatus sts = PXCSession_Create(&m_session);
if (PXC_STATUS_NO_ERROR > sts)
return;
m_contextOpened = m_imageStream.initStream(m_session);
m_contextOpened &= m_depthStream.initStream(m_session);
}
virtual ~CvCapture_IntelPerC(){}
virtual double getProperty(int propIdx)
{
double propValue = 0;
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
propValue = m_imageStream.getProperty(purePropIdx);
}
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
propValue = m_depthStream.getProperty(purePropIdx);
}
else
{
propValue = m_depthStream.getProperty(purePropIdx);
}
return propValue;
}
virtual bool setProperty(int propIdx, double propVal)
{
bool isSet = false;
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
isSet = m_imageStream.setProperty(purePropIdx, propVal);
}
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
{
isSet = m_depthStream.setProperty(purePropIdx, propVal);
}
else
{
isSet = m_depthStream.setProperty(purePropIdx, propVal);
}
return isSet;
}
bool grabFrame()
{
if (!isOpened())
return false;
bool isGrabbed = false;
if (m_depthStream.isValid())
isGrabbed = m_depthStream.grabFrame();
if ((m_imageStream.isValid()) && (-1 != m_imageStream.getProfileIDX()))
isGrabbed &= m_imageStream.grabFrame();
return isGrabbed;
}
virtual IplImage* retrieveFrame(int outputType)
{
IplImage* image = 0;
switch (outputType)
{
case CV_CAP_INTELPERC_DEPTH_MAP:
image = m_depthStream.retrieveDepthFrame();
break;
case CV_CAP_INTELPERC_UVDEPTH_MAP:
image = m_depthStream.retrieveUVFrame();
break;
case CV_CAP_INTELPERC_IR_MAP:
image = m_depthStream.retrieveIRFrame();
break;
case CV_CAP_INTELPERC_IMAGE:
image = m_imageStream.retrieveFrame();
break;
}
CV_Assert(NULL != image);
return image;
}
bool isOpened() const
{
return m_contextOpened;
}
protected:
bool m_contextOpened;
PXCSmartPtr<PXCSession> m_session;
CvIntelPerCStreamImage m_imageStream;
CvIntelPerCStreamDepth m_depthStream;
};
CvCapture* cvCreateCameraCapture_IntelPerC(int index)
{
CvCapture_IntelPerC* capture = new CvCapture_IntelPerC(index);
if( capture->isOpened() )
return capture;
delete capture;
return 0;
}
#endif //HAVE_INTELPERC

View File

@ -128,6 +128,7 @@ CvCapture* cvCreateFileCapture_OpenNI( const char* filename );
CvCapture* cvCreateCameraCapture_Android( int index );
CvCapture* cvCreateCameraCapture_XIMEA( int index );
CvCapture* cvCreateCameraCapture_AVFoundation(int index);
CvCapture* cvCreateCameraCapture_IntelPerC(int index);
CVAPI(int) cvHaveImageReader(const char* filename);

View File

@ -61,7 +61,6 @@
#ifdef __GNUC__
# pragma GCC diagnostic ignored "-Wmissing-declarations"
#endif
#include <MultiMon.h>
#include <commctrl.h>
#include <winuser.h>

View File

@ -35,6 +35,7 @@
defined(HAVE_XIMEA) || \
defined(HAVE_AVFOUNDATION) || \
defined(HAVE_GIGE_API) || \
defined(HAVE_INTELPERC) || \
(0)
//defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193
# define BUILD_WITH_CAMERA_SUPPORT 1

View File

@ -34,7 +34,7 @@ http://en.wikipedia.org/wiki/Canny_edge_detector
* An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.cpp
* (Python) An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.py
* (Python) An example on using the canny edge detector can be found at opencv_source_code/samples/python/edge.py
cornerEigenValsAndVecs
----------------------

View File

@ -364,7 +364,7 @@ CV_INLINE double cvContourPerimeter( const void* contour )
}
/* Calculates contour boundning rectangle (update=1) or
/* Calculates contour bounding rectangle (update=1) or
just retrieves pre-calculated rectangle (update=0) */
CVAPI(CvRect) cvBoundingRect( CvArr* points, int update CV_DEFAULT(0) );

View File

@ -41,12 +41,13 @@
#include "precomp.hpp"
/*
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
#define USE_IPP_CANNY 1
#else
#undef USE_IPP_CANNY
#endif
*/
#ifdef USE_IPP_CANNY
namespace cv
{

View File

@ -40,10 +40,90 @@
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
// ----------------------------------------------------------------------
// CLAHE
namespace clahe
{
static bool calcLut(cv::InputArray _src, cv::OutputArray _dst,
const int tilesX, const int tilesY, const cv::Size tileSize,
const int clipLimit, const float lutScale)
{
cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
cv::String opts;
if(is_cpu)
opts = "-D CPU ";
else
opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
if(k.empty())
return false;
cv::UMat src = _src.getUMat();
_dst.create(tilesX * tilesY, 256, CV_8UC1);
cv::UMat dst = _dst.getUMat();
int tile_size[2];
tile_size[0] = tileSize.width;
tile_size[1] = tileSize.height;
size_t localThreads[3] = { 32, 8, 1 };
size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
int idx = 0;
idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
idx = k.set(idx, tile_size);
idx = k.set(idx, tilesX);
idx = k.set(idx, clipLimit);
idx = k.set(idx, lutScale);
if (!k.run(2, globalThreads, localThreads, false))
return false;
return true;
}
static bool transform(const cv::InputArray _src, cv::OutputArray _dst, const cv::InputArray _lut,
const int tilesX, const int tilesY, const cv::Size & tileSize)
{
cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc);
if(k.empty())
return false;
int tile_size[2];
tile_size[0] = tileSize.width;
tile_size[1] = tileSize.height;
cv::UMat src = _src.getUMat();
_dst.create(src.size(), src.type());
cv::UMat dst = _dst.getUMat();
cv::UMat lut = _lut.getUMat();
size_t localThreads[3] = { 32, 8, 1 };
size_t globalThreads[3] = { src.cols, src.rows, 1 };
int idx = 0;
idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut));
idx = k.set(idx, src.cols);
idx = k.set(idx, src.rows);
idx = k.set(idx, tile_size);
idx = k.set(idx, tilesX);
idx = k.set(idx, tilesY);
if (!k.run(2, globalThreads, localThreads, false))
return false;
return true;
}
}
namespace
{
class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
@ -241,7 +321,9 @@ namespace
int tilesY_;
cv::Mat srcExt_;
cv::UMat usrcExt_;
cv::Mat lut_;
cv::UMat ulut_;
};
CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
@ -256,31 +338,34 @@ namespace
void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
{
cv::Mat src = _src.getMat();
CV_Assert( _src.type() == CV_8UC1 );
CV_Assert( src.type() == CV_8UC1 );
_dst.create( src.size(), src.type() );
cv::Mat dst = _dst.getMat();
bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2;
const int histSize = 256;
lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
cv::Size tileSize;
cv::Mat srcForLut;
cv::_InputArray _srcForLut;
if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
if (_src.size().width % tilesX_ == 0 && _src.size().height % tilesY_ == 0)
{
tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
srcForLut = src;
tileSize = cv::Size(_src.size().width / tilesX_, _src.size().height / tilesY_);
_srcForLut = _src;
}
else
{
cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
srcForLut = srcExt_;
if(useOpenCL)
{
cv::copyMakeBorder(_src, usrcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
tileSize = cv::Size(usrcExt_.size().width / tilesX_, usrcExt_.size().height / tilesY_);
_srcForLut = usrcExt_;
}
else
{
cv::copyMakeBorder(_src, srcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
tileSize = cv::Size(srcExt_.size().width / tilesX_, srcExt_.size().height / tilesY_);
_srcForLut = srcExt_;
}
}
const int tileSizeTotal = tileSize.area();
@ -293,6 +378,16 @@ namespace
clipLimit = std::max(clipLimit, 1);
}
if(useOpenCL && clahe::calcLut(_srcForLut, ulut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale) )
if( clahe::transform(_src, _dst, ulut_, tilesX_, tilesY_, tileSize) )
return;
cv::Mat src = _src.getMat();
_dst.create( src.size(), src.type() );
cv::Mat dst = _dst.getMat();
cv::Mat srcForLut = _srcForLut.getMat();
lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
@ -325,6 +420,8 @@ namespace
{
srcExt_.release();
lut_.release();
usrcExt_.release();
ulut_.release();
}
}

View File

@ -3151,7 +3151,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
CV_Assert( scn == 3 || scn == 4 );
_dst.create(sz, CV_MAKETYPE(depth, 1));
dst = _dst.getMat();
/*
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
if( code == CV_BGR2GRAY )
{
@ -3174,7 +3174,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
return;
}
#endif
*/
bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
if( depth == CV_8U )

View File

@ -42,6 +42,7 @@
#include "precomp.hpp"
#include "opencl_kernels.hpp"
#include <sstream>
/****************************************************************************************\
Base Image Filter
@ -3314,6 +3315,246 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
}
return kernel.run(2, globalsize, localsize, true);
}
static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync)
{
int type = src.type();
int cn = CV_MAT_CN(type);
int sdepth = CV_MAT_DEPTH(type);
Size bufSize = buf.size();
#ifdef ANDROID
size_t localsize[2] = {16, 10};
#else
size_t localsize[2] = {16, 16};
#endif
size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]};
if (CV_8U == sdepth)
{
switch (cn)
{
case 1:
globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0];
break;
case 2:
globalsize[0] = DIVUP((bufSize.width + 1) >> 1, localsize[0]) * localsize[0];
break;
case 4:
globalsize[0] = DIVUP(bufSize.width, localsize[0]) * localsize[0];
break;
}
}
int radiusX = anchor;
int radiusY = (int)((buf.rows - src.rows) >> 1);
bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0;
const char* btype = NULL;
switch (borderType & ~BORDER_ISOLATED)
{
case BORDER_CONSTANT:
btype = "BORDER_CONSTANT";
break;
case BORDER_REPLICATE:
btype = "BORDER_REPLICATE";
break;
case BORDER_REFLECT:
btype = "BORDER_REFLECT";
break;
case BORDER_WRAP:
btype = "BORDER_WRAP";
break;
case BORDER_REFLECT101:
btype = "BORDER_REFLECT_101";
break;
default:
return false;
}
bool extra_extrapolation = src.rows < (int)((-radiusY + globalsize[1]) >> 1) + 1;
extra_extrapolation |= src.rows < radiusY;
extra_extrapolation |= src.cols < (int)((-radiusX + globalsize[0] + 8 * localsize[0] + 3) >> 1) + 1;
extra_extrapolation |= src.cols < radiusX;
cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s",
radiusX, (int)localsize[0], (int)localsize[1], cn,
btype,
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset);
std::stringstream strKernel;
strKernel << "row_filter";
if (-1 != cn)
strKernel << "_C" << cn;
if (-1 != sdepth)
strKernel << "_D" << sdepth;
ocl::Kernel kernelRow;
if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options))
return false;
int idxArg = 0;
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
idxArg = kernelRow.set(idxArg, (int)(src.step / src.elemSize()));
idxArg = kernelRow.set(idxArg, srcOffset.x);
idxArg = kernelRow.set(idxArg, srcOffset.y);
idxArg = kernelRow.set(idxArg, src.cols);
idxArg = kernelRow.set(idxArg, src.rows);
idxArg = kernelRow.set(idxArg, srcWholeSize.width);
idxArg = kernelRow.set(idxArg, srcWholeSize.height);
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrWriteOnly(buf));
idxArg = kernelRow.set(idxArg, (int)(buf.step / buf.elemSize()));
idxArg = kernelRow.set(idxArg, buf.cols);
idxArg = kernelRow.set(idxArg, buf.rows);
idxArg = kernelRow.set(idxArg, radiusY);
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
return kernelRow.run(2, globalsize, localsize, sync);
}
static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, bool sync)
{
#ifdef ANDROID
size_t localsize[2] = {16, 10};
#else
size_t localsize[2] = {16, 16};
#endif
size_t globalsize[2] = {0, 0};
int type = dst.type();
int cn = CV_MAT_CN(type);
int ddepth = CV_MAT_DEPTH(type);
Size sz = dst.size();
globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
cv::String build_options;
if (CV_8U == ddepth)
{
switch (cn)
{
case 1:
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "uchar", "convert_uchar_sat");
break;
case 2:
globalsize[0] = DIVUP((sz.width + 1) / 2, localsize[0]) * localsize[0];
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float2", "uchar2", "convert_uchar2_sat");
break;
case 3:
case 4:
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "uchar4", "convert_uchar4_sat");
break;
}
}
else
{
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
switch (dst.type())
{
case CV_32SC1:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "int", "convert_int_sat");
break;
case CV_32SC3:
case CV_32SC4:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "int4", "convert_int4_sat");
break;
case CV_32FC1:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "float", "");
break;
case CV_32FC3:
case CV_32FC4:
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "float4", "");
break;
}
}
ocl::Kernel kernelCol;
if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options))
return false;
int idxArg = 0;
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(buf));
idxArg = kernelCol.set(idxArg, (int)(buf.step / buf.elemSize()));
idxArg = kernelCol.set(idxArg, buf.cols);
idxArg = kernelCol.set(idxArg, buf.rows);
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
idxArg = kernelCol.set(idxArg, (int)(dst.offset / dst.elemSize()));
idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize()));
idxArg = kernelCol.set(idxArg, dst.cols);
idxArg = kernelCol.set(idxArg, dst.rows);
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
return kernelCol.run(2, globalsize, localsize, sync);
}
static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray _kernelX, InputArray _kernelY, Point anchor,
double delta, int borderType )
{
if (abs(delta)> FLT_MIN)
return false;
int type = _src.type();
if ((CV_8UC1 != type) && (CV_8UC4 == type) &&
(CV_32FC1 != type) && (CV_32FC4 == type))
return false;
int cn = CV_MAT_CN(type);
Mat kernelX = _kernelX.getMat().reshape(1, 1);
if (1 != (kernelX.cols % 2))
return false;
Mat kernelY = _kernelY.getMat().reshape(1, 1);
if (1 != (kernelY.cols % 2))
return false;
int sdepth = CV_MAT_DEPTH(type);
if( anchor.x < 0 )
anchor.x = kernelX.cols >> 1;
if( anchor.y < 0 )
anchor.y = kernelY.cols >> 1;
if( ddepth < 0 )
ddepth = sdepth;
else if (ddepth != sdepth)
return false;
UMat src = _src.getUMat();
Size srcWholeSize; Point srcOffset;
src.locateROI(srcWholeSize, srcOffset);
if ( (0 != (srcOffset.x % 4)) ||
(0 != (src.cols % 4)) ||
(0 != ((src.step / src.elemSize()) % 4))
)
{
return false;
}
Size srcSize = src.size();
Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn));
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, true))
return false;
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
UMat dst = _dst.getUMat();
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, true);
}
}
cv::Ptr<cv::BaseFilter> cv::getLinearFilter(int srcType, int dstType,
@ -3481,6 +3722,10 @@ void cv::sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
InputArray _kernelX, InputArray _kernelY, Point anchor,
double delta, int borderType )
{
bool use_opencl = ocl::useOpenCL() && _dst.isUMat();
if( use_opencl && ocl_sepFilter2D(_src, _dst, ddepth, _kernelX, _kernelY, anchor, delta, borderType))
return;
Mat src = _src.getMat(), kernelX = _kernelX.getMat(), kernelY = _kernelY.getMat();
if( ddepth < 0 )

View File

@ -1930,13 +1930,159 @@ void cv::calcBackProject( const Mat* images, int nimages, const int* channels,
}
namespace cv {
static void getUMatIndex(const std::vector<UMat> & um, int cn, int & idx, int & cnidx)
{
int totalChannels = 0;
for (size_t i = 0, size = um.size(); i < size; ++i)
{
int ccn = um[i].channels();
totalChannels += ccn;
if (totalChannels == cn)
{
idx = (int)(i + 1);
cnidx = 0;
return;
}
else if (totalChannels > cn)
{
idx = (int)i;
cnidx = i == 0 ? cn : (cn - totalChannels + ccn);
return;
}
}
idx = cnidx = -1;
}
static bool ocl_calcBackProject( InputArrayOfArrays _images, std::vector<int> channels,
InputArray _hist, OutputArray _dst,
const std::vector<float>& ranges,
float scale, size_t histdims )
{
const std::vector<UMat> & images = *(const std::vector<UMat> *)_images.getObj();
size_t nimages = images.size(), totalcn = images[0].channels();
CV_Assert(nimages > 0);
Size size = images[0].size();
int depth = images[0].depth();
for (size_t i = 1; i < nimages; ++i)
{
const UMat & m = images[i];
totalcn += m.channels();
CV_Assert(size == m.size() && depth == m.depth());
}
std::sort(channels.begin(), channels.end());
for (size_t i = 0; i < histdims; ++i)
CV_Assert(channels[i] < (int)totalcn);
if (histdims == 1)
{
int idx, cnidx;
getUMatIndex(images, channels[0], idx, cnidx);
CV_Assert(idx >= 0);
UMat im = images[idx];
String opts = format("-D histdims=1 -D scn=%d", im.channels());
ocl::Kernel lutk("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (lutk.empty())
return false;
size_t lsize = 256;
UMat lut(1, (int)lsize, CV_32SC1), hist = _hist.getUMat(), uranges(ranges, true);
lutk.args(ocl::KernelArg::ReadOnlyNoSize(hist), hist.rows,
ocl::KernelArg::PtrWriteOnly(lut), scale, ocl::KernelArg::PtrReadOnly(uranges));
if (!lutk.run(1, &lsize, NULL, false))
return false;
ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (mapk.empty())
return false;
_dst.create(size, depth);
UMat dst = _dst.getUMat();
im.offset += cnidx;
mapk.args(ocl::KernelArg::ReadOnlyNoSize(im), ocl::KernelArg::PtrReadOnly(lut),
ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = { size.width, size.height };
return mapk.run(2, globalsize, NULL, false);
}
else if (histdims == 2)
{
int idx0, idx1, cnidx0, cnidx1;
getUMatIndex(images, channels[0], idx0, cnidx0);
getUMatIndex(images, channels[1], idx1, cnidx1);
CV_Assert(idx0 >= 0 && idx1 >= 0);
UMat im0 = images[idx0], im1 = images[idx1];
// Lut for the first dimension
String opts = format("-D histdims=2 -D scn1=%d -D scn2=%d", im0.channels(), im1.channels());
ocl::Kernel lutk1("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (lutk1.empty())
return false;
size_t lsize = 256;
UMat lut(1, (int)lsize<<1, CV_32SC1), uranges(ranges, true), hist = _hist.getUMat();
lutk1.args(hist.rows, ocl::KernelArg::PtrWriteOnly(lut), (int)0, ocl::KernelArg::PtrReadOnly(uranges), (int)0);
if (!lutk1.run(1, &lsize, NULL, false))
return false;
// lut for the second dimension
ocl::Kernel lutk2("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (lutk2.empty())
return false;
lut.offset += lsize * sizeof(int);
lutk2.args(hist.cols, ocl::KernelArg::PtrWriteOnly(lut), (int)256, ocl::KernelArg::PtrReadOnly(uranges), (int)2);
if (!lutk2.run(1, &lsize, NULL, false))
return false;
// perform lut
ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
if (mapk.empty())
return false;
_dst.create(size, depth);
UMat dst = _dst.getUMat();
im0.offset += cnidx0;
im1.offset += cnidx1;
mapk.args(ocl::KernelArg::ReadOnlyNoSize(im0), ocl::KernelArg::ReadOnlyNoSize(im1),
ocl::KernelArg::ReadOnlyNoSize(hist), ocl::KernelArg::PtrReadOnly(lut), scale, ocl::KernelArg::WriteOnly(dst));
size_t globalsize[2] = { size.width, size.height };
return mapk.run(2, globalsize, NULL, false);
}
return false;
}
}
void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& channels,
InputArray hist, OutputArray dst,
const std::vector<float>& ranges,
double scale )
{
Size histSize = hist.size();
bool _1D = histSize.height == 1 || histSize.width == 1;
size_t histdims = _1D ? 1 : hist.dims();
if (ocl::useOpenCL() && images.isUMatVector() && dst.isUMat() && hist.type() == CV_32FC1 &&
histdims <= 2 && ranges.size() == histdims * 2 && histdims == channels.size() &&
ocl_calcBackProject(images, channels, hist, dst, ranges, (float)scale, histdims))
return;
Mat H0 = hist.getMat(), H;
int hcn = H0.channels();
if( hcn > 1 )
{
CV_Assert( H0.isContinuous() );
@ -1947,12 +2093,15 @@ void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& cha
}
else
H = H0;
bool _1d = H.rows == 1 || H.cols == 1;
int i, dims = H.dims, rsz = (int)ranges.size(), csz = (int)channels.size();
int nimages = (int)images.total();
CV_Assert(nimages > 0);
CV_Assert(rsz == dims*2 || (rsz == 2 && _1d) || (rsz == 0 && images.depth(0) == CV_8U));
CV_Assert(csz == 0 || csz == dims || (csz == 1 && _1d));
float* _ranges[CV_MAX_DIM];
if( rsz > 0 )
{
@ -3169,7 +3318,7 @@ static bool ocl_calcHist(InputArray _src, OutputArray _hist)
static bool ocl_equalizeHist(InputArray _src, OutputArray _dst)
{
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
size_t wgs = std::min<size_t>(ocl::Device::getDefault().maxWorkGroupSize(), BINS);
// calculation of histogram
UMat hist;

View File

@ -2212,7 +2212,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
int depth = src.depth(), cn = src.channels();
double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
int k, sx, sy, dx, dy;
/*
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0;
int type = src.type();
@ -2240,7 +2240,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
return;
}
#endif
*/
if( interpolation == INTER_NEAREST )
{
resizeNN( src, dst, inv_scale_x, inv_scale_y );
@ -3299,7 +3299,10 @@ public:
if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
{
bufxy = (*m1)(Rect(x, y, bcols, brows));
bufa = (*m2)(Rect(x, y, bcols, brows));
const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
for( x1 = 0; x1 < bcols; x1++ )
A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
}
else if( planar_input )
{
@ -3680,7 +3683,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
{
for( x = 0; x < size.width; x++ )
{
int fxy = src2 ? src2[x] : 0;
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
}
@ -3689,7 +3692,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
{
for( x = 0; x < size.width; x++ )
{
int fxy = src2 ? src2[x] : 0;
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
}
@ -4000,7 +4003,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
const int AB_BITS = MAX(10, (int)INTER_BITS);
const int AB_SCALE = 1 << AB_BITS;
/*
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
int depth = src.depth();
int channels = src.channels();
@ -4044,7 +4047,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
}
}
#endif
*/
for( x = 0; x < dst.cols; x++ )
{
adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
@ -4231,7 +4234,7 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
if( !(flags & WARP_INVERSE_MAP) )
invert(matM, matM);
/*
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
int depth = src.depth();
int channels = src.channels();
@ -4275,7 +4278,7 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
}
}
#endif
*/
Range range(0, dst.rows);
warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
parallel_for_(range, invoker, dst.total()/(double)(1<<16));

View File

@ -39,6 +39,7 @@
//
//M*/
#include "precomp.hpp"
#include "opencl_kernels.hpp"
namespace cv
{
@ -362,106 +363,175 @@ Moments::Moments( double _m00, double _m10, double _m01, double _m20, double _m1
nu30 = mu30*s3; nu21 = mu21*s3; nu12 = mu12*s3; nu03 = mu03*s3;
}
static bool ocl_moments( InputArray _src, Moments& m)
{
const int TILE_SIZE = 32;
const int K = 10;
ocl::Kernel k("moments", ocl::imgproc::moments_oclsrc, format("-D TILE_SIZE=%d", TILE_SIZE));
if( k.empty() )
return false;
UMat src = _src.getUMat();
Size sz = src.size();
int xtiles = (sz.width + TILE_SIZE-1)/TILE_SIZE;
int ytiles = (sz.height + TILE_SIZE-1)/TILE_SIZE;
int ntiles = xtiles*ytiles;
UMat umbuf(1, ntiles*K, CV_32S);
size_t globalsize[] = {xtiles, sz.height}, localsize[] = {1, TILE_SIZE};
bool ok = k.args(ocl::KernelArg::ReadOnly(src),
ocl::KernelArg::PtrWriteOnly(umbuf),
xtiles).run(2, globalsize, localsize, true);
if(!ok)
return false;
Mat mbuf = umbuf.getMat(ACCESS_READ);
for( int i = 0; i < ntiles; i++ )
{
double x = (i % xtiles)*TILE_SIZE, y = (i / xtiles)*TILE_SIZE;
const int* mom = mbuf.ptr<int>() + i*K;
double xm = x * mom[0], ym = y * mom[0];
// accumulate moments computed in each tile
// + m00 ( = m00' )
m.m00 += mom[0];
// + m10 ( = m10' + x*m00' )
m.m10 += mom[1] + xm;
// + m01 ( = m01' + y*m00' )
m.m01 += mom[2] + ym;
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
m.m20 += mom[3] + x * (mom[1] * 2 + xm);
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
m.m02 += mom[5] + y * (mom[2] * 2 + ym);
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
}
return true;
}
}
cv::Moments cv::moments( InputArray _src, bool binary )
{
const int TILE_SIZE = 32;
Mat mat = _src.getMat();
MomentsInTileFunc func = 0;
uchar nzbuf[TILE_SIZE*TILE_SIZE];
Moments m;
int type = mat.type();
int type = _src.type();
int depth = CV_MAT_DEPTH( type );
int cn = CV_MAT_CN( type );
if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
return contourMoments(mat);
Size size = mat.size();
if( cn > 1 )
CV_Error( CV_StsBadArg, "Invalid image type" );
Size size = _src.size();
if( size.width <= 0 || size.height <= 0 )
return m;
if( binary || depth == CV_8U )
func = momentsInTile<uchar, int, int>;
else if( depth == CV_16U )
func = momentsInTile<ushort, int, int64>;
else if( depth == CV_16S )
func = momentsInTile<short, int, int64>;
else if( depth == CV_32F )
func = momentsInTile<float, double, double>;
else if( depth == CV_64F )
func = momentsInTile<double, double, double>;
if( ocl::useOpenCL() && type == CV_8UC1 && !binary &&
_src.isUMat() && ocl_moments(_src, m) )
;
else
CV_Error( CV_StsUnsupportedFormat, "" );
Mat src0(mat);
for( int y = 0; y < size.height; y += TILE_SIZE )
{
Size tileSize;
tileSize.height = std::min(TILE_SIZE, size.height - y);
Mat mat = _src.getMat();
if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
return contourMoments(mat);
for( int x = 0; x < size.width; x += TILE_SIZE )
if( cn > 1 )
CV_Error( CV_StsBadArg, "Invalid image type (must be single-channel)" );
if( binary || depth == CV_8U )
func = momentsInTile<uchar, int, int>;
else if( depth == CV_16U )
func = momentsInTile<ushort, int, int64>;
else if( depth == CV_16S )
func = momentsInTile<short, int, int64>;
else if( depth == CV_32F )
func = momentsInTile<float, double, double>;
else if( depth == CV_64F )
func = momentsInTile<double, double, double>;
else
CV_Error( CV_StsUnsupportedFormat, "" );
Mat src0(mat);
for( int y = 0; y < size.height; y += TILE_SIZE )
{
tileSize.width = std::min(TILE_SIZE, size.width - x);
Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));
Size tileSize;
tileSize.height = std::min(TILE_SIZE, size.height - y);
if( binary )
for( int x = 0; x < size.width; x += TILE_SIZE )
{
cv::Mat tmp(tileSize, CV_8U, nzbuf);
cv::compare( src, 0, tmp, CV_CMP_NE );
src = tmp;
tileSize.width = std::min(TILE_SIZE, size.width - x);
Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));
if( binary )
{
cv::Mat tmp(tileSize, CV_8U, nzbuf);
cv::compare( src, 0, tmp, CV_CMP_NE );
src = tmp;
}
double mom[10];
func( src, mom );
if(binary)
{
double s = 1./255;
for( int k = 0; k < 10; k++ )
mom[k] *= s;
}
double xm = x * mom[0], ym = y * mom[0];
// accumulate moments computed in each tile
// + m00 ( = m00' )
m.m00 += mom[0];
// + m10 ( = m10' + x*m00' )
m.m10 += mom[1] + xm;
// + m01 ( = m01' + y*m00' )
m.m01 += mom[2] + ym;
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
m.m20 += mom[3] + x * (mom[1] * 2 + xm);
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
m.m02 += mom[5] + y * (mom[2] * 2 + ym);
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
}
double mom[10];
func( src, mom );
if(binary)
{
double s = 1./255;
for( int k = 0; k < 10; k++ )
mom[k] *= s;
}
double xm = x * mom[0], ym = y * mom[0];
// accumulate moments computed in each tile
// + m00 ( = m00' )
m.m00 += mom[0];
// + m10 ( = m10' + x*m00' )
m.m10 += mom[1] + xm;
// + m01 ( = m01' + y*m00' )
m.m01 += mom[2] + ym;
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
m.m20 += mom[3] + x * (mom[1] * 2 + xm);
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
m.m02 += mom[5] + y * (mom[2] * 2 + ym);
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
}
}

View File

@ -43,6 +43,7 @@
#include "precomp.hpp"
#include <limits.h>
#include <stdio.h>
#include "opencl_kernels.hpp"
/****************************************************************************************\
Basic Morphological Operations: Erosion & Dilation
@ -1283,11 +1284,132 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
}
#endif
static const char* op2str[] = {"ERODE", "DILATE"};
static bool ocl_morphology_op(InputArray _src, OutputArray _dst, InputArray _kernel, Size &ksize, const Point anchor, int iterations, int op)
{
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
if (_src.depth() == CV_64F && !doubleSupport)
return false;
UMat kernel8U;
_kernel.getUMat().convertTo(kernel8U, CV_8U);
UMat kernel = kernel8U.reshape(1, 1);
bool rectKernel = true;
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
if(kernel.getMat(ACCESS_READ).at<uchar>(i) != 1)
rectKernel = false;
UMat src = _src.getUMat();
#ifdef ANDROID
size_t localThreads[3] = {16, 8, 1};
#else
size_t localThreads[3] = {16, 16, 1};
#endif
size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
if(localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1))
return false;
char compile_option[128];
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s %s %s -D GENTYPE=%s -D DEPTH_%d",
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], op2str[op], doubleSupport?"-D DOUBLE_SUPPORT" :"", rectKernel?"-D RECTKERNEL":"",
ocl::typeToStr(_src.type()), _src.depth() );
std::vector<ocl::Kernel> kernels;
for(int i = 0; i<iterations; i++)
{
ocl::Kernel k( "morph", ocl::imgproc::morph_oclsrc, compile_option);
if (k.empty())
return false;
kernels.push_back(k);
}
_dst.create(src.size(), src.type());
UMat dst = _dst.getUMat();
if( iterations== 1 && src.u != dst.u)
{
Size wholesize;
Point ofs;
src.locateROI(wholesize, ofs);
int wholecols = wholesize.width, wholerows = wholesize.height;
int idxArg = 0;
idxArg = kernels[0].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(src));
idxArg = kernels[0].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
idxArg = kernels[0].set(idxArg, ofs.x);
idxArg = kernels[0].set(idxArg, ofs.y);
idxArg = kernels[0].set(idxArg, src.cols);
idxArg = kernels[0].set(idxArg, src.rows);
idxArg = kernels[0].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel));
idxArg = kernels[0].set(idxArg, wholecols);
idxArg = kernels[0].set(idxArg, wholerows);
return kernels[0].run(2, globalThreads, localThreads, false);
}
for(int i = 0; i< iterations; i++)
{
UMat source;
Size wholesize;
Point ofs;
if( i == 0)
{
int cols = src.cols, rows = src.rows;
src.locateROI(wholesize,ofs);
src.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
src.copyTo(source);
src.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
}
else
{
int cols = dst.cols, rows = dst.rows;
dst.locateROI(wholesize,ofs);
dst.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
dst.copyTo(source);
dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
}
source.locateROI(wholesize, ofs);
int wholecols = wholesize.width, wholerows = wholesize.height;
int idxArg = 0;
idxArg = kernels[i].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(source));
idxArg = kernels[i].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
idxArg = kernels[i].set(idxArg, ofs.x);
idxArg = kernels[i].set(idxArg, ofs.y);
idxArg = kernels[i].set(idxArg, source.cols);
idxArg = kernels[i].set(idxArg, source.rows);
idxArg = kernels[i].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel));
idxArg = kernels[i].set(idxArg, wholecols);
idxArg = kernels[i].set(idxArg, wholerows);
if (!kernels[i].run(2, globalThreads, localThreads, false))
return false;
}
return true;
}
static void morphOp( int op, InputArray _src, OutputArray _dst,
InputArray _kernel,
Point anchor, int iterations,
int borderType, const Scalar& borderValue )
{
int src_type = _src.type(), dst_type = _dst.type(),
src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);
bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
_src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) &&
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
(borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue()) &&
(op == MORPH_ERODE || op == MORPH_DILATE);
Mat kernel = _kernel.getMat();
Size ksize = kernel.data ? kernel.size() : Size(3,3);
anchor = normalizeAnchor(anchor, ksize);
@ -1299,13 +1421,11 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
return;
#endif
Mat src = _src.getMat();
_dst.create( src.size(), src.type() );
Mat dst = _dst.getMat();
if( iterations == 0 || kernel.rows*kernel.cols == 1 )
{
Mat src = _src.getMat();
_dst.create( src.size(), src.type() );
Mat dst = _dst.getMat();
src.copyTo(dst);
return;
}
@ -1326,6 +1446,14 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
iterations = 1;
}
if (useOpenCL && ocl_morphology_op(_src, _dst, kernel, ksize, anchor, iterations, op) )
return;
Mat src = _src.getMat();
_dst.create( src.size(), src.type() );
Mat dst = _dst.getMat();
int nStripes = 1;
#if defined HAVE_TEGRA_OPTIMIZATION
if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing
@ -1362,49 +1490,97 @@ void cv::dilate( InputArray src, OutputArray dst, InputArray kernel,
morphOp( MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue );
}
void cv::morphologyEx( InputArray _src, OutputArray _dst, int op,
InputArray kernel, Point anchor, int iterations,
int borderType, const Scalar& borderValue )
{
Mat src = _src.getMat(), temp;
_dst.create(src.size(), src.type());
Mat dst = _dst.getMat();
int src_type = _src.type(), dst_type = _dst.type(),
src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);
bool use_opencl = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
_src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) &&
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
(borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue());
_dst.create(_src.size(), _src.type());
Mat src, dst, temp;
UMat usrc, udst, utemp;
switch( op )
{
case MORPH_ERODE:
erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case MORPH_DILATE:
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case MORPH_OPEN:
erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
dilate( dst, dst, kernel, anchor, iterations, borderType, borderValue );
erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
dilate( _dst, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case CV_MOP_CLOSE:
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
erode( dst, dst, kernel, anchor, iterations, borderType, borderValue );
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
erode( _dst, _dst, kernel, anchor, iterations, borderType, borderValue );
break;
case CV_MOP_GRADIENT:
erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
dst -= temp;
erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
if(use_opencl)
{
udst = _dst.getUMat();
subtract(udst, utemp, udst);
}
else
{
dst = _dst.getMat();
dst -= temp;
}
break;
case CV_MOP_TOPHAT:
if( src.data != dst.data )
temp = dst;
erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
dilate( temp, temp, kernel, anchor, iterations, borderType, borderValue );
dst = src - temp;
if(use_opencl)
{
usrc = _src.getUMat();
udst = _dst.getUMat();
if( usrc.u != udst.u )
utemp = udst;
}
else
{
src = _src.getMat();
dst = _dst.getMat();
if( src.data != dst.data )
temp = dst;
}
erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
dilate( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel,
anchor, iterations, borderType, borderValue );
if(use_opencl)
subtract(usrc, utemp, udst);
else
dst = src - temp;
break;
case CV_MOP_BLACKHAT:
if( src.data != dst.data )
temp = dst;
dilate( src, temp, kernel, anchor, iterations, borderType, borderValue );
erode( temp, temp, kernel, anchor, iterations, borderType, borderValue );
dst = temp - src;
if(use_opencl)
{
usrc = _src.getUMat();
udst = _dst.getUMat();
if( usrc.u != udst.u )
utemp = udst;
}
else
{
src = _src.getMat();
dst = _dst.getMat();
if( src.data != dst.data )
temp = dst;
}
dilate( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
erode( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel,
anchor, iterations, borderType, borderValue );
if(use_opencl)
subtract(utemp, usrc, udst);
else
dst = temp - src;
break;
default:
CV_Error( CV_StsBadArg, "unknown morphological operation" );

View File

@ -0,0 +1,135 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Xu Pang, pangxu010@163.com
// Wenju He, wenju@multicorewareinc.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
#define OUT_OF_RANGE -1
#if histdims == 1
__kernel void calcLUT(__global const uchar * histptr, int hist_step, int hist_offset, int hist_bins,
__global int * lut, float scale, __constant float * ranges)
{
int x = get_global_id(0);
float value = convert_float(x);
if (value > ranges[1] || value < ranges[0])
lut[x] = OUT_OF_RANGE;
else
{
float lb = ranges[0], ub = ranges[1], gap = (ub - lb) / hist_bins;
value -= lb;
int bin = convert_int_sat_rtn(value / gap);
if (bin >= hist_bins)
lut[x] = OUT_OF_RANGE;
else
{
int hist_index = mad24(hist_step, bin, hist_offset);
__global const float * hist = (__global const float *)(histptr + hist_index);
lut[x] = (int)convert_uchar_sat_rte(hist[0] * scale);
}
}
}
__kernel void LUT(__global const uchar * src, int src_step, int src_offset,
__constant int * lut,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int src_index = mad24(y, src_step, src_offset + x * scn);
int dst_index = mad24(y, dst_step, dst_offset + x);
int value = lut[src[src_index]];
dst[dst_index] = value == OUT_OF_RANGE ? 0 : convert_uchar(value);
}
}
#elif histdims == 2
__kernel void calcLUT(int hist_bins, __global int * lut, int lut_offset,
__constant float * ranges, int roffset)
{
int x = get_global_id(0);
float value = convert_float(x);
ranges += roffset;
lut += lut_offset;
if (value > ranges[1] || value < ranges[0])
lut[x] = OUT_OF_RANGE;
else
{
float lb = ranges[0], ub = ranges[1], gap = (ub - lb) / hist_bins;
value -= lb;
int bin = convert_int_sat_rtn(value / gap);
lut[x] = bin >= hist_bins ? OUT_OF_RANGE : bin;
}
}
__kernel void LUT(__global const uchar * src1, int src1_step, int src1_offset,
__global const uchar * src2, int src2_step, int src2_offset,
__global const uchar * histptr, int hist_step, int hist_offset,
__constant int * lut, float scale,
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
{
int x = get_global_id(0);
int y = get_global_id(1);
if (x < dst_cols && y < dst_rows)
{
int src1_index = mad24(y, src1_step, src1_offset + x * scn1);
int src2_index = mad24(y, src2_step, src2_offset + x * scn2);
int dst_index = mad24(y, dst_step, dst_offset + x);
int bin1 = lut[src1[src1_index]];
int bin2 = lut[src2[src2_index] + 256];
dst[dst_index] = bin1 == OUT_OF_RANGE || bin2 == OUT_OF_RANGE ? 0 :
convert_uchar_sat_rte(*(__global const float *)(histptr +
mad24(hist_step, bin1, hist_offset + bin2 * (int)sizeof(float))) * scale);
}
}
#else
#error "(nimages <= 2) should be true"
#endif

View File

@ -0,0 +1,252 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef WAVE_SIZE
#define WAVE_SIZE 1
#endif
inline int calc_lut(__local int* smem, int val, int tid)
{
smem[tid] = val;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid == 0)
for (int i = 1; i < 256; ++i)
smem[i] += smem[i - 1];
barrier(CLK_LOCAL_MEM_FENCE);
return smem[tid];
}
#ifdef CPU
inline void reduce(volatile __local int* smem, int val, int tid)
{
smem[tid] = val;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128)
smem[tid] = val += smem[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64)
smem[tid] = val += smem[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
smem[tid] += smem[tid + 32];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
smem[tid] += smem[tid + 16];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
smem[tid] += smem[tid + 8];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
smem[tid] += smem[tid + 4];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
smem[tid] += smem[tid + 2];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
smem[256] = smem[tid] + smem[tid + 1];
barrier(CLK_LOCAL_MEM_FENCE);
}
#else
inline void reduce(__local volatile int* smem, int val, int tid)
{
smem[tid] = val;
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 128)
smem[tid] = val += smem[tid + 128];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 64)
smem[tid] = val += smem[tid + 64];
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 32)
{
smem[tid] += smem[tid + 32];
#if WAVE_SIZE < 32
} barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 16)
{
#endif
smem[tid] += smem[tid + 16];
#if WAVE_SIZE < 16
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 8)
{
#endif
smem[tid] += smem[tid + 8];
smem[tid] += smem[tid + 4];
smem[tid] += smem[tid + 2];
smem[tid] += smem[tid + 1];
}
}
#endif
__kernel void calcLut(__global __const uchar * src, const int srcStep,
const int src_offset, __global uchar * lut,
const int dstStep, const int dst_offset,
const int2 tileSize, const int tilesX,
const int clipLimit, const float lutScale)
{
__local int smem[512];
int tx = get_group_id(0);
int ty = get_group_id(1);
int tid = get_local_id(1) * get_local_size(0)
+ get_local_id(0);
smem[tid] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
{
__global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset);
for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
{
const int data = srcPtr[j];
atomic_inc(&smem[data]);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
int tHistVal = smem[tid];
barrier(CLK_LOCAL_MEM_FENCE);
if (clipLimit > 0)
{
// clip histogram bar
int clipped = 0;
if (tHistVal > clipLimit)
{
clipped = tHistVal - clipLimit;
tHistVal = clipLimit;
}
// find number of overall clipped samples
reduce(smem, clipped, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
clipped = smem[256];
#else
clipped = smem[0];
#endif
// broadcast evaluated value
__local int totalClipped;
if (tid == 0)
totalClipped = clipped;
barrier(CLK_LOCAL_MEM_FENCE);
// redistribute clipped samples evenly
int redistBatch = totalClipped / 256;
tHistVal += redistBatch;
int residual = totalClipped - redistBatch * 256;
if (tid < residual)
++tHistVal;
}
const int lutVal = calc_lut(smem, tHistVal, tid);
uint ires = (uint)convert_int_rte(lutScale * lutVal);
lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] =
convert_uchar(clamp(ires, (uint)0, (uint)255));
}
__kernel void transform(__global __const uchar * src, const int srcStep, const int src_offset,
__global uchar * dst, const int dstStep, const int dst_offset,
__global uchar * lut, const int lutStep, int lut_offset,
const int cols, const int rows,
const int2 tileSize,
const int tilesX, const int tilesY)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if (x >= cols || y >= rows)
return;
const float tyf = (convert_float(y) / tileSize.y) - 0.5f;
int ty1 = convert_int_rtn(tyf);
int ty2 = ty1 + 1;
const float ya = tyf - ty1;
ty1 = max(ty1, 0);
ty2 = min(ty2, tilesY - 1);
const float txf = (convert_float(x) / tileSize.x) - 0.5f;
int tx1 = convert_int_rtn(txf);
int tx2 = tx1 + 1;
const float xa = txf - tx1;
tx1 = max(tx1, 0);
tx2 = min(tx2, tilesX - 1);
const int srcVal = src[mad24(y, srcStep, x + src_offset)];
float res = 0;
res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya));
res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya));
res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya));
res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya));
uint ires = (uint)convert_int_rte(res);
dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
}

View File

@ -0,0 +1,116 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
#define RADIUS 1
#if CN ==1
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==2
#define ALIGN (((RADIUS)+1)>>1<<1)
#elif CN==3
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==4
#define ALIGN (RADIUS)
#define READ_TIMES_ROW ((2*(RADIUS+LSIZE0)-1)/LSIZE0)
#endif
/**********************************************************************************
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
kernel must be in the center. ROI is not supported either.
Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed
from LDS to calculate the result.
The length of the convovle kernel supported is only related to the MAX size of LDS,
which is HW related.
Niko
6/29/2011
The info above maybe obsolete.
***********************************************************************************/
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
(__global const GENTYPE_SRC * restrict src,
const int src_step_in_pixel,
const int src_whole_cols,
const int src_whole_rows,
__global GENTYPE_DST * dst,
const int dst_offset_in_pixel,
const int dst_step_in_pixel,
const int dst_cols,
const int dst_rows,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSY+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_addr = mad24(y, src_step_in_pixel, x);
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
int i;
GENTYPE_SRC sum, temp[READ_TIMES_COL];
__local GENTYPE_SRC LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
//read pixels from src
for(i = 0;i<READ_TIMES_COL;i++)
{
int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
current_addr = current_addr < end_addr ? current_addr : 0;
temp[i] = src[current_addr];
}
//save pixels to lds
for(i = 0;i<READ_TIMES_COL;i++)
{
LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
}
barrier(CLK_LOCAL_MEM_FENCE);
//read pixels from lds and calculate the result
sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
for(i=1;i<=RADIUSY;i++)
{
temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
}
//write the result to dst
if((x<dst_cols) & (y<dst_rows))
{
start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel);
dst[start_addr] = convert_to_DST(sum);
}
}

View File

@ -0,0 +1,570 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
#define READ_TIMES_ROW ((2*(RADIUSX+LSIZE0)-1)/LSIZE0) //for c4 only
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
//#pragma OPENCL EXTENSION cl_amd_printf : enable
#define RADIUS 1
#if CN ==1
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==2
#define ALIGN (((RADIUS)+1)>>1<<1)
#elif CN==3
#define ALIGN (((RADIUS)+3)>>2<<2)
#elif CN==4
#define ALIGN (RADIUS)
#endif
#ifdef BORDER_REPLICATE
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr))
#endif
#ifdef BORDER_REFLECT
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
#endif
#ifdef BORDER_REFLECT_101
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
#endif
//blur function does not support BORDER_WRAP
#ifdef BORDER_WRAP
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
#endif
#ifdef EXTRA_EXTRAPOLATION // border > src image size
#ifdef BORDER_CONSTANT
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
#elif defined BORDER_REPLICATE
#define EXTRAPOLATE(t, minT, maxT) \
{ \
t = max(min(t, (maxT) - 1), (minT)); \
}
#elif defined BORDER_WRAP
#define EXTRAPOLATE(x, minT, maxT) \
{ \
if (t < (minT)) \
t -= ((t - (maxT) + 1) / (maxT)) * (maxT); \
if (t >= (maxT)) \
t %= (maxT); \
}
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
#define EXTRAPOLATE_(t, minT, maxT, delta) \
{ \
if ((maxT) - (minT) == 1) \
t = (minT); \
else \
do \
{ \
if (t < (minT)) \
t = (minT) - (t - (minT)) - 1 + delta; \
else \
t = (maxT) - 1 - (t - (maxT)) - delta; \
} \
while (t >= (maxT) || t < (minT)); \
\
}
#ifdef BORDER_REFLECT
#define EXTRAPOLATE(t, minT, maxT) EXTRAPOLATE_(t, minT, maxT, 0)
#elif defined(BORDER_REFLECT_101)
#define EXTRAPOLATE(t, minT, maxT) EXTRAPOLATE_(t, minT, maxT, 1)
#endif
#else
#error No extrapolation method
#endif //BORDER_....
#else //EXTRA_EXTRAPOLATION
#ifdef BORDER_CONSTANT
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
#else
#define EXTRAPOLATE(t, minT, maxT) \
{ \
int _delta = t - (minT); \
_delta = ADDR_L(_delta, 0, (maxT) - (minT)); \
_delta = ADDR_R(_delta, (maxT) - (minT), _delta); \
t = _delta + (minT); \
}
#endif //BORDER_CONSTANT
#endif //EXTRA_EXTRAPOLATION
/**********************************************************************************
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
kernel must be in the center. ROI is not supported either.
For channels =1,2,4, each kernels read 4 elements(not 4 pixels), and for channels =3,
the kernel read 4 pixels, save them to LDS and read the data needed from LDS to
calculate the result.
The length of the convovle kernel supported is related to the LSIZE0 and the MAX size
of LDS, which is HW related.
For channels = 1,3 the RADIUS is no more than LSIZE0*2
For channels = 2, the RADIUS is no more than LSIZE0
For channels = 4, arbitary RADIUS is supported unless the LDS is not enough
Niko
6/29/2011
The info above maybe obsolete.
***********************************************************************************/
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
(__global uchar * restrict src,
int src_step_in_pixel,
int src_offset_x, int src_offset_y,
int src_cols, int src_rows,
int src_whole_cols, int src_whole_rows,
__global float * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0)<<2;
int y = get_global_id(1);
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_x = x+src_offset_x - RADIUSX & 0xfffffffc;
int offset = src_offset_x - RADIUSX & 3;
int start_y = y + src_offset_y - radiusy;
int start_addr = mad24(start_y, src_step_in_pixel, start_x);
int i;
float4 sum;
uchar4 temp[READ_TIMES_ROW];
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
// read pixels from src
for (i = 0; i < READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0*4;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = *(__global uchar4*)&src[current_addr];
}
// judge if read out of boundary
#ifdef BORDER_ISOLATED
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i].x = ELEM(start_x+i*LSIZE0*4, src_offset_x, src_offset_x + src_cols, 0, temp[i].x);
temp[i].y = ELEM(start_x+i*LSIZE0*4+1, src_offset_x, src_offset_x + src_cols, 0, temp[i].y);
temp[i].z = ELEM(start_x+i*LSIZE0*4+2, src_offset_x, src_offset_x + src_cols, 0, temp[i].z);
temp[i].w = ELEM(start_x+i*LSIZE0*4+3, src_offset_x, src_offset_x + src_cols, 0, temp[i].w);
temp[i] = ELEM(start_y, src_offset_y, src_offset_y + src_rows, (uchar4)0, temp[i]);
}
#else
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i].x = ELEM(start_x+i*LSIZE0*4, 0, src_whole_cols, 0, temp[i].x);
temp[i].y = ELEM(start_x+i*LSIZE0*4+1, 0, src_whole_cols, 0, temp[i].y);
temp[i].z = ELEM(start_x+i*LSIZE0*4+2, 0, src_whole_cols, 0, temp[i].z);
temp[i].w = ELEM(start_x+i*LSIZE0*4+3, 0, src_whole_cols, 0, temp[i].w);
temp[i] = ELEM(start_y, 0, src_whole_rows, (uchar4)0, temp[i]);
}
#endif
#else // BORDER_CONSTANT
#ifdef BORDER_ISOLATED
int not_all_in_range = (start_x<src_offset_x) | (start_x + READ_TIMES_ROW*LSIZE0*4+4>src_offset_x + src_cols)| (start_y<src_offset_y) | (start_y >= src_offset_y + src_rows);
#else
int not_all_in_range = (start_x<0) | (start_x + READ_TIMES_ROW*LSIZE0*4+4>src_whole_cols)| (start_y<0) | (start_y >= src_whole_rows);
#endif
int4 index[READ_TIMES_ROW];
int4 addr;
int s_y;
if (not_all_in_range)
{
// judge if read out of boundary
for (i = 0; i < READ_TIMES_ROW; i++)
{
index[i] = (int4)(start_x+i*LSIZE0*4) + (int4)(0, 1, 2, 3);
#ifdef BORDER_ISOLATED
EXTRAPOLATE(index[i].x, src_offset_x, src_offset_x + src_cols);
EXTRAPOLATE(index[i].y, src_offset_x, src_offset_x + src_cols);
EXTRAPOLATE(index[i].z, src_offset_x, src_offset_x + src_cols);
EXTRAPOLATE(index[i].w, src_offset_x, src_offset_x + src_cols);
#else
EXTRAPOLATE(index[i].x, 0, src_whole_cols);
EXTRAPOLATE(index[i].y, 0, src_whole_cols);
EXTRAPOLATE(index[i].z, 0, src_whole_cols);
EXTRAPOLATE(index[i].w, 0, src_whole_cols);
#endif
}
s_y = start_y;
#ifdef BORDER_ISOLATED
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
#else
EXTRAPOLATE(s_y, 0, src_whole_rows);
#endif
// read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
{
addr = mad24((int4)s_y,(int4)src_step_in_pixel,index[i]);
temp[i].x = src[addr.x];
temp[i].y = src[addr.y];
temp[i].z = src[addr.z];
temp[i].w = src[addr.w];
}
}
else
{
// read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
temp[i] = *(__global uchar4*)&src[start_addr+i*LSIZE0*4];
}
#endif //BORDER_CONSTANT
// save pixels to lds
for (i = 0; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
barrier(CLK_LOCAL_MEM_FENCE);
// read pixels from lds and calculate the result
sum =convert_float4(vload4(0,(__local uchar*)&LDS_DAT[l_y][l_x]+RADIUSX+offset))*mat_kernel[RADIUSX];
for (i=1; i<=RADIUSX; i++)
{
temp[0] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset - i);
temp[1] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset + i);
sum += convert_float4(temp[0]) * mat_kernel[RADIUSX-i] + convert_float4(temp[1]) * mat_kernel[RADIUSX+i];
}
start_addr = mad24(y,dst_step_in_pixel,x);
// write the result to dst
if ((x+3<dst_cols) & (y<dst_rows))
*(__global float4*)&dst[start_addr] = sum;
else if ((x+2<dst_cols) && (y<dst_rows))
{
dst[start_addr] = sum.x;
dst[start_addr+1] = sum.y;
dst[start_addr+2] = sum.z;
}
else if ((x+1<dst_cols) && (y<dst_rows))
{
dst[start_addr] = sum.x;
dst[start_addr+1] = sum.y;
}
else if (x<dst_cols && y<dst_rows)
dst[start_addr] = sum.x;
}
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D0
(__global uchar4 * restrict src,
int src_step_in_pixel,
int src_offset_x, int src_offset_y,
int src_cols, int src_rows,
int src_whole_cols, int src_whole_rows,
__global float4 * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_x = x+src_offset_x-RADIUSX;
int start_y = y+src_offset_y-radiusy;
int start_addr = mad24(start_y,src_step_in_pixel,start_x);
int i;
float4 sum;
uchar4 temp[READ_TIMES_ROW];
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
// read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = src[current_addr];
}
//judge if read out of boundary
#ifdef BORDER_ISOLATED
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i]= ELEM(start_x+i*LSIZE0, src_offset_x, src_offset_x + src_cols, (uchar4)0, temp[i]);
temp[i]= ELEM(start_y, src_offset_y, src_offset_y + src_rows, (uchar4)0, temp[i]);
}
#else
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i]= ELEM(start_x+i*LSIZE0, 0, src_whole_cols, (uchar4)0, temp[i]);
temp[i]= ELEM(start_y, 0, src_whole_rows, (uchar4)0, temp[i]);
}
#endif
#else
int index[READ_TIMES_ROW];
int s_x,s_y;
// judge if read out of boundary
for (i = 0; i<READ_TIMES_ROW; i++)
{
s_x = start_x+i*LSIZE0;
s_y = start_y;
#ifdef BORDER_ISOLATED
EXTRAPOLATE(s_x, src_offset_x, src_offset_x + src_cols);
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
#else
EXTRAPOLATE(s_x, 0, src_whole_cols);
EXTRAPOLATE(s_y, 0, src_whole_rows);
#endif
index[i]=mad24(s_y, src_step_in_pixel, s_x);
}
//read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
temp[i] = src[index[i]];
#endif //BORDER_CONSTANT
//save pixels to lds
for (i = 0; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
barrier(CLK_LOCAL_MEM_FENCE);
//read pixels from lds and calculate the result
sum =convert_float4(LDS_DAT[l_y][l_x+RADIUSX])*mat_kernel[RADIUSX];
for (i=1; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
sum += convert_float4(temp[0])*mat_kernel[RADIUSX-i]+convert_float4(temp[1])*mat_kernel[RADIUSX+i];
}
//write the result to dst
if (x<dst_cols && y<dst_rows)
{
start_addr = mad24(y,dst_step_in_pixel,x);
dst[start_addr] = sum;
}
}
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D5
(__global float * restrict src,
int src_step_in_pixel,
int src_offset_x, int src_offset_y,
int src_cols, int src_rows,
int src_whole_cols, int src_whole_rows,
__global float * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_x = x+src_offset_x-RADIUSX;
int start_y = y+src_offset_y-radiusy;
int start_addr = mad24(start_y,src_step_in_pixel,start_x);
int i;
float sum;
float temp[READ_TIMES_ROW];
__local float LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
// read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = src[current_addr];
}
// judge if read out of boundary
#ifdef BORDER_ISOLATED
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i]= ELEM(start_x+i*LSIZE0, src_offset_x, src_offset_x + src_cols, (float)0,temp[i]);
temp[i]= ELEM(start_y, src_offset_y, src_offset_y + src_rows, (float)0,temp[i]);
}
#else
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i]= ELEM(start_x+i*LSIZE0, 0, src_whole_cols, (float)0,temp[i]);
temp[i]= ELEM(start_y, 0, src_whole_rows, (float)0,temp[i]);
}
#endif
#else // BORDER_CONSTANT
int index[READ_TIMES_ROW];
int s_x,s_y;
// judge if read out of boundary
for (i = 0; i<READ_TIMES_ROW; i++)
{
s_x = start_x + i*LSIZE0, s_y = start_y;
#ifdef BORDER_ISOLATED
EXTRAPOLATE(s_x, src_offset_x, src_offset_x + src_cols);
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
#else
EXTRAPOLATE(s_x, 0, src_whole_cols);
EXTRAPOLATE(s_y, 0, src_whole_rows);
#endif
index[i]=mad24(s_y, src_step_in_pixel, s_x);
}
// read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
temp[i] = src[index[i]];
#endif// BORDER_CONSTANT
//save pixels to lds
for (i = 0; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
barrier(CLK_LOCAL_MEM_FENCE);
// read pixels from lds and calculate the result
sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
for (i=1; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
sum += temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i];
}
// write the result to dst
if (x<dst_cols && y<dst_rows)
{
start_addr = mad24(y,dst_step_in_pixel,x);
dst[start_addr] = sum;
}
}
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D5
(__global float4 * restrict src,
int src_step_in_pixel,
int src_offset_x, int src_offset_y,
int src_cols, int src_rows,
int src_whole_cols, int src_whole_rows,
__global float4 * dst,
int dst_step_in_pixel,
int dst_cols, int dst_rows,
int radiusy,
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
{
int x = get_global_id(0);
int y = get_global_id(1);
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int start_x = x+src_offset_x-RADIUSX;
int start_y = y+src_offset_y-radiusy;
int start_addr = mad24(start_y,src_step_in_pixel,start_x);
int i;
float4 sum;
float4 temp[READ_TIMES_ROW];
__local float4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
#ifdef BORDER_CONSTANT
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
// read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
{
int current_addr = start_addr+i*LSIZE0;
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
temp[i] = src[current_addr];
}
// judge if read out of boundary
#ifdef BORDER_ISOLATED
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i]= ELEM(start_x+i*LSIZE0, src_offset_x, src_offset_x + src_cols, (float4)0,temp[i]);
temp[i]= ELEM(start_y, src_offset_y, src_offset_y + src_rows, (float4)0,temp[i]);
}
#else
for (i = 0; i<READ_TIMES_ROW; i++)
{
temp[i]= ELEM(start_x+i*LSIZE0, 0, src_whole_cols, (float4)0,temp[i]);
temp[i]= ELEM(start_y, 0, src_whole_rows, (float4)0,temp[i]);
}
#endif
#else
int index[READ_TIMES_ROW];
int s_x,s_y;
// judge if read out of boundary
for (i = 0; i<READ_TIMES_ROW; i++)
{
s_x = start_x + i*LSIZE0, s_y = start_y;
#ifdef BORDER_ISOLATED
EXTRAPOLATE(s_x, src_offset_x, src_offset_x + src_cols);
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
#else
EXTRAPOLATE(s_x, 0, src_whole_cols);
EXTRAPOLATE(s_y, 0, src_whole_rows);
#endif
index[i]=mad24(s_y,src_step_in_pixel,s_x);
}
// read pixels from src
for (i = 0; i<READ_TIMES_ROW; i++)
temp[i] = src[index[i]];
#endif
// save pixels to lds
for (i = 0; i<READ_TIMES_ROW; i++)
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
barrier(CLK_LOCAL_MEM_FENCE);
// read pixels from lds and calculate the result
sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
for (i=1; i<=RADIUSX; i++)
{
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
sum += temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i];
}
// write the result to dst
if (x<dst_cols && y<dst_rows)
{
start_addr = mad24(y,dst_step_in_pixel,x);
dst[start_addr] = sum;
}
}

View File

@ -0,0 +1,147 @@
/* See LICENSE file in the root OpenCV directory */
#if TILE_SIZE != 32
#error "TILE SIZE should be 32"
#endif
__kernel void moments(__global const uchar* src, int src_step, int src_offset,
int src_rows, int src_cols, __global int* mom0, int xtiles)
{
int x0 = get_global_id(0);
int y0 = get_group_id(1);
int x, y = get_local_id(1);
int x_min = x0*TILE_SIZE;
int ypix = y0*TILE_SIZE + y;
__local int mom[TILE_SIZE][10];
if( x_min < src_cols && y0*TILE_SIZE < src_rows )
{
if( ypix < src_rows )
{
int x_max = min(src_cols - x_min, TILE_SIZE);
__global const uchar* ptr = src + src_offset + ypix*src_step + x_min;
int4 S = (int4)(0,0,0,0), p;
#define SUM_ELEM(elem, ofs) \
(int4)(1, (ofs), (ofs)*(ofs), (ofs)*(ofs)*(ofs))*elem
x = x_max & -4;
if( x_max >= 4 )
{
p = convert_int4(vload4(0, ptr));
S += SUM_ELEM(p.s0, 0) + SUM_ELEM(p.s1, 1) + SUM_ELEM(p.s2, 2) + SUM_ELEM(p.s3, 3);
if( x_max >= 8 )
{
p = convert_int4(vload4(0, ptr+4));
S += SUM_ELEM(p.s0, 4) + SUM_ELEM(p.s1, 5) + SUM_ELEM(p.s2, 6) + SUM_ELEM(p.s3, 7);
if( x_max >= 12 )
{
p = convert_int4(vload4(0, ptr+8));
S += SUM_ELEM(p.s0, 8) + SUM_ELEM(p.s1, 9) + SUM_ELEM(p.s2, 10) + SUM_ELEM(p.s3, 11);
if( x_max >= 16 )
{
p = convert_int4(vload4(0, ptr+12));
S += SUM_ELEM(p.s0, 12) + SUM_ELEM(p.s1, 13) + SUM_ELEM(p.s2, 14) + SUM_ELEM(p.s3, 15);
}
}
}
}
if( x_max >= 20 )
{
p = convert_int4(vload4(0, ptr+16));
S += SUM_ELEM(p.s0, 16) + SUM_ELEM(p.s1, 17) + SUM_ELEM(p.s2, 18) + SUM_ELEM(p.s3, 19);
if( x_max >= 24 )
{
p = convert_int4(vload4(0, ptr+20));
S += SUM_ELEM(p.s0, 20) + SUM_ELEM(p.s1, 21) + SUM_ELEM(p.s2, 22) + SUM_ELEM(p.s3, 23);
if( x_max >= 28 )
{
p = convert_int4(vload4(0, ptr+24));
S += SUM_ELEM(p.s0, 24) + SUM_ELEM(p.s1, 25) + SUM_ELEM(p.s2, 26) + SUM_ELEM(p.s3, 27);
if( x_max >= 32 )
{
p = convert_int4(vload4(0, ptr+28));
S += SUM_ELEM(p.s0, 28) + SUM_ELEM(p.s1, 29) + SUM_ELEM(p.s2, 30) + SUM_ELEM(p.s3, 31);
}
}
}
}
if( x < x_max )
{
int ps = ptr[x];
S += SUM_ELEM(ps, x);
if( x+1 < x_max )
{
ps = ptr[x+1];
S += SUM_ELEM(ps, x+1);
if( x+2 < x_max )
{
ps = ptr[x+2];
S += SUM_ELEM(ps, x+2);
}
}
}
int sy = y*y;
mom[y][0] = S.s0;
mom[y][1] = S.s1;
mom[y][2] = y*S.s0;
mom[y][3] = S.s2;
mom[y][4] = y*S.s1;
mom[y][5] = sy*S.s0;
mom[y][6] = S.s3;
mom[y][7] = y*S.s2;
mom[y][8] = sy*S.s1;
mom[y][9] = y*sy*S.s0;
}
else
mom[y][0] = mom[y][1] = mom[y][2] = mom[y][3] = mom[y][4] =
mom[y][5] = mom[y][6] = mom[y][7] = mom[y][8] = mom[y][9] = 0;
barrier(CLK_LOCAL_MEM_FENCE);
#define REDUCE(d) \
if( y < d ) \
{ \
mom[y][0] += mom[y+d][0]; \
mom[y][1] += mom[y+d][1]; \
mom[y][2] += mom[y+d][2]; \
mom[y][3] += mom[y+d][3]; \
mom[y][4] += mom[y+d][4]; \
mom[y][5] += mom[y+d][5]; \
mom[y][6] += mom[y+d][6]; \
mom[y][7] += mom[y+d][7]; \
mom[y][8] += mom[y+d][8]; \
mom[y][9] += mom[y+d][9]; \
} \
barrier(CLK_LOCAL_MEM_FENCE)
REDUCE(16);
REDUCE(8);
REDUCE(4);
REDUCE(2);
if( y == 0 )
{
__global int* momout = mom0 + (y0*xtiles + x0)*10;
momout[0] = mom[0][0] + mom[1][0];
momout[1] = mom[0][1] + mom[1][1];
momout[2] = mom[0][2] + mom[1][2];
momout[3] = mom[0][3] + mom[1][3];
momout[4] = mom[0][4] + mom[1][4];
momout[5] = mom[0][5] + mom[1][5];
momout[6] = mom[0][6] + mom[1][6];
momout[7] = mom[0][7] + mom[1][7];
momout[8] = mom[0][8] + mom[1][8];
momout[9] = mom[0][9] + mom[1][9];
}
}
}

View File

@ -0,0 +1,152 @@
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Zero Lin, zero.lin@amd.com
// Yao Wang, bitwangyaoyao@gmail.com
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors as is and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//
#ifdef DOUBLE_SUPPORT
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64:enable
#elif defined (cl_khr_fp64)
#pragma OPENCL EXTENSION cl_khr_fp64:enable
#endif
#endif
#ifdef DEPTH_0
#ifdef ERODE
#define VAL 255
#endif
#ifdef DILATE
#define VAL 0
#endif
#endif
#ifdef DEPTH_5
#ifdef ERODE
#define VAL FLT_MAX
#endif
#ifdef DILATE
#define VAL -FLT_MAX
#endif
#endif
#ifdef DEPTH_6
#ifdef ERODE
#define VAL DBL_MAX
#endif
#ifdef DILATE
#define VAL -DBL_MAX
#endif
#endif
#ifdef ERODE
#define MORPH_OP(A,B) min((A),(B))
#endif
#ifdef DILATE
#define MORPH_OP(A,B) max((A),(B))
#endif
//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
__kernel void morph(__global const uchar * restrict srcptr, int src_step, int src_offset,
__global uchar * dstptr, int dst_step, int dst_offset,
int src_offset_x, int src_offset_y,
int cols, int rows,
__constant uchar * mat_kernel,
int src_whole_cols, int src_whole_rows)
{
int l_x = get_local_id(0);
int l_y = get_local_id(1);
int x = get_group_id(0)*LSIZE0;
int y = get_group_id(1)*LSIZE1;
int start_x = x+src_offset_x-RADIUSX;
int end_x = x + src_offset_x+LSIZE0+RADIUSX;
int width = end_x -(x+src_offset_x-RADIUSX)+1;
int start_y = y+src_offset_y-RADIUSY;
int point1 = mad24(l_y,LSIZE0,l_x);
int point2 = point1 + LSIZE0*LSIZE1;
int tl_x = point1 % width;
int tl_y = point1 / width;
int tl_x2 = point2 % width;
int tl_y2 = point2 / width;
int cur_x = start_x + tl_x;
int cur_y = start_y + tl_y;
int cur_x2 = start_x + tl_x2;
int cur_y2 = start_y + tl_y2;
int start_addr = mad24(cur_y,src_step, cur_x*(int)sizeof(GENTYPE));
int start_addr2 = mad24(cur_y2,src_step, cur_x2*(int)sizeof(GENTYPE));
GENTYPE temp0,temp1;
__local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0];
int end_addr = mad24(src_whole_rows - 1,src_step,src_whole_cols*(int)sizeof(GENTYPE));
//read pixels from src
start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
__global const GENTYPE * src;
src = (__global const GENTYPE *)(srcptr+start_addr);
temp0 = src[0];
src = (__global const GENTYPE *)(srcptr+start_addr2);
temp1 = src[0];
//judge if read out of boundary
temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0);
temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0);
temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1);
temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1);
LDS_DAT[point1] = temp0;
LDS_DAT[point2] = temp1;
barrier(CLK_LOCAL_MEM_FENCE);
GENTYPE res = (GENTYPE)VAL;
for(int i=0; i<2*RADIUSY+1; i++)
for(int j=0; j<2*RADIUSX+1; j++)
{
res =
#ifndef RECTKERNEL
mat_kernel[i*(2*RADIUSX+1)+j] ?
#endif
MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)])
#ifndef RECTKERNEL
:res
#endif
;
}
int gidx = get_global_id(0);
int gidy = get_global_id(1);
if(gidx<cols && gidy<rows)
{
int dst_index = mad24(gidy, dst_step, dst_offset + gidx * (int)sizeof(GENTYPE));
__global GENTYPE * dst = (__global GENTYPE *)(dstptr + dst_index);
dst[0] = res;
}
}

View File

@ -229,6 +229,75 @@ OCL_TEST_P(GaussianBlurTest, Mat)
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// Erode
typedef FilterTestBase Erode;
OCL_TEST_P(Erode, Mat)
{
Size kernelSize(ksize, ksize);
int iterations = (int)param;
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
OCL_OFF(cv::erode(src_roi, dst_roi, kernel, Point(-1,-1), iterations) );
OCL_ON(cv::erode(usrc_roi, udst_roi, kernel, Point(-1,-1), iterations) );
Near();
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// Dilate
typedef FilterTestBase Dilate;
OCL_TEST_P(Dilate, Mat)
{
Size kernelSize(ksize, ksize);
int iterations = (int)param;
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
OCL_OFF(cv::dilate(src_roi, dst_roi, kernel, Point(-1,-1), iterations) );
OCL_ON(cv::dilate(usrc_roi, udst_roi, kernel, Point(-1,-1), iterations) );
Near();
}
}
/////////////////////////////////////////////////////////////////////////////////////////////////
// MorphologyEx
typedef FilterTestBase MorphologyEx;
OCL_TEST_P(MorphologyEx, Mat)
{
Size kernelSize(ksize, ksize);
int iterations = (int)param;
int op = size.height;
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
OCL_OFF(cv::morphologyEx(src_roi, dst_roi, op, kernel, Point(-1,-1), iterations) );
OCL_ON(cv::morphologyEx(usrc_roi, udst_roi, op, kernel, Point(-1,-1), iterations) );
Near();
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define FILTER_BORDER_SET_NO_ISOLATED \
@ -285,6 +354,31 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
Values(0.0), // not used
Bool()));
OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
Values(3, 5, 7),
Values(Size(0,0)),//not used
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool() ) );
OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
Values(3, 5, 7),
Values(Size(0,0)),//not used
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool() ) );
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
Values(3, 5, 7),
Values(Size(0,0), Size(0,1), Size(0,2), Size(0,3), Size(0,4), Size(0,5),Size(0,6)),//uses as generator of operations
Values((BorderType)BORDER_CONSTANT),//not used
Values(1.0, 2.0, 3.0),
Bool() ) );
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -0,0 +1,175 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
// Niko Li, newlife20080214@gmail.com
// Jia Haipeng, jiahaipeng95@gmail.com
// Shengen Yan, yanshengen@gmail.com
// Jiang Liyuan, lyuan001.good@163.com
// Rock Li, Rock.Li@amd.com
// Wu Zailong, bullet@yeah.net
// Xu Pang, pangxu010@163.com
// Sen Liu, swjtuls1987@126.com
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "cvconfig.h"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
///////////////////////////////////////////////////////////////////////////////
PARAM_TEST_CASE(CalcBackProject, MatDepth, int, bool)
{
int depth, N;
bool useRoi;
std::vector<float> ranges;
std::vector<int> channels;
double scale;
std::vector<Mat> images;
std::vector<Mat> images_roi;
std::vector<UMat> uimages;
std::vector<UMat> uimages_roi;
TEST_DECLARE_INPUT_PARAMETER(hist)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
depth = GET_PARAM(0);
N = GET_PARAM(1);
useRoi = GET_PARAM(2);
ASSERT_GE(2, N);
images.resize(N);
images_roi.resize(N);
uimages.resize(N);
uimages_roi.resize(N);
}
virtual void random_roi()
{
Size roiSize = randomSize(1, MAX_VALUE);
int totalChannels = 0;
for (int i = 0; i < N; ++i)
{
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
int cn = randomInt(1, 5);
randomSubMat(images[i], images_roi[i], roiSize, srcBorder, CV_MAKE_TYPE(depth, cn), 0, 125);
ranges.push_back(10);
ranges.push_back(100);
channels.push_back(randomInt(0, cn) + totalChannels);
totalChannels += cn;
}
Mat tmpHist;
{
std::vector<int> hist_size(N);
for (int i = 0 ; i < N; ++i)
hist_size[i] = randomInt(10, 50);
cv::calcHist(images_roi, channels, noArray(), tmpHist, hist_size, ranges);
ASSERT_EQ(CV_32FC1, tmpHist.type());
}
Border histBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(hist, hist_roi, tmpHist.size(), histBorder, tmpHist.type(), 0, MAX_VALUE);
tmpHist.copyTo(hist_roi);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, CV_MAKE_TYPE(depth, 1), 5, 16);
for (int i = 0; i < N; ++i)
{
images[i].copyTo(uimages[i]);
Size _wholeSize;
Point ofs;
images_roi[i].locateROI(_wholeSize, ofs);
uimages_roi[i] = uimages[i](Rect(ofs.x, ofs.y, images_roi[i].cols, images_roi[i].rows));
}
UMAT_UPLOAD_INPUT_PARAMETER(hist)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
scale = randomDouble(0.1, 1);
}
void Near()
{
OCL_EXPECT_MATS_NEAR(dst, 0.0)
}
};
//////////////////////////////// CalcBackProject //////////////////////////////////////////////
OCL_TEST_P(CalcBackProject, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
OCL_OFF(cv::calcBackProject(images_roi, channels, hist_roi, dst_roi, ranges, scale));
OCL_ON(cv::calcBackProject(uimages_roi, channels, uhist_roi, udst_roi, ranges, scale));
Near();
}
}
/////////////////////////////////////////////////////////////////////////////////////
OCL_INSTANTIATE_TEST_CASE_P(Imgproc, CalcBackProject, Combine(Values((MatDepth)CV_8U), Values(1, 2), Bool()));
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -0,0 +1,147 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "test_precomp.hpp"
#include "opencv2/ts/ocl_test.hpp"
#ifdef HAVE_OPENCL
namespace cvtest {
namespace ocl {
/////////////////////////////////////////////////////////////////////////////////////////////////
// sepFilter2D
PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
{
static const int kernelMinSize = 2;
static const int kernelMaxSize = 10;
int type;
Point anchor;
int borderType;
bool useRoi;
Mat kernelX, kernelY;
TEST_DECLARE_INPUT_PARAMETER(src)
TEST_DECLARE_OUTPUT_PARAMETER(dst)
virtual void SetUp()
{
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
borderType = GET_PARAM(2) | (GET_PARAM(3) ? BORDER_ISOLATED : 0);
useRoi = GET_PARAM(4);
}
void random_roi()
{
Size ksize = randomSize(kernelMinSize, kernelMaxSize);
if (1 != (ksize.width % 2))
ksize.width++;
if (1 != (ksize.height % 2))
ksize.height++;
Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelX, 1.0, 0.0, NORM_L1);
temp = randomMat(Size(1, ksize.height), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
int rest = roiSize.width % 4;
if (0 != rest)
roiSize.width += (4 - rest);
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
rest = srcBorder.lef % 4;
if (0 != rest)
srcBorder.lef += (4 - rest);
rest = srcBorder.rig % 4;
if (0 != rest)
srcBorder.rig += (4 - rest);
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
anchor.x = -1;
anchor.y = -1;
UMAT_UPLOAD_INPUT_PARAMETER(src)
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
}
void Near(double threshold = 0.0)
{
OCL_EXPECT_MATS_NEAR(dst, threshold);
}
};
OCL_TEST_P(SepFilter2D, Mat)
{
for (int j = 0; j < test_loop_times; j++)
{
random_roi();
OCL_OFF(cv::sepFilter2D(src_roi, dst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
OCL_ON(cv::sepFilter2D(usrc_roi, udst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
Near(1.0);
}
}
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
Combine(
Values(CV_8U, CV_32F),
Values(1, 4),
Values(
(BorderType)BORDER_CONSTANT,
(BorderType)BORDER_REPLICATE,
(BorderType)BORDER_REFLECT,
(BorderType)BORDER_REFLECT_101),
Bool(), // BORDER_ISOLATED
Bool() // ROI
)
);
} } // namespace cvtest::ocl
#endif // HAVE_OPENCL

View File

@ -43,6 +43,13 @@
using namespace cv;
using namespace std;
#define OCL_TUNING_MODE 0
#if OCL_TUNING_MODE
#define OCL_TUNING_MODE_ONLY(code) code
#else
#define OCL_TUNING_MODE_ONLY(code)
#endif
// image moments
class CV_MomentsTest : public cvtest::ArrayTest
{
@ -60,6 +67,7 @@ protected:
void run_func();
int coi;
bool is_binary;
bool try_umat;
};
@ -70,6 +78,7 @@ CV_MomentsTest::CV_MomentsTest()
test_array[REF_OUTPUT].push_back(NULL);
coi = -1;
is_binary = false;
OCL_TUNING_MODE_ONLY(test_case_count = 10);
//element_wise_relative_error = false;
}
@ -96,25 +105,38 @@ void CV_MomentsTest::get_minmax_bounds( int i, int j, int type, Scalar& low, Sca
}
}
void CV_MomentsTest::get_test_array_types_and_sizes( int test_case_idx,
vector<vector<Size> >& sizes, vector<vector<int> >& types )
{
RNG& rng = ts->get_rng();
cvtest::ArrayTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
int cn = cvtest::randInt(rng) % 4 + 1;
int cn = (cvtest::randInt(rng) % 4) + 1;
int depth = cvtest::randInt(rng) % 4;
depth = depth == 0 ? CV_8U : depth == 1 ? CV_16U : depth == 2 ? CV_16S : CV_32F;
if( cn == 2 )
is_binary = cvtest::randInt(rng) % 2 != 0;
if( depth == 0 && !is_binary )
try_umat = cvtest::randInt(rng) % 5 != 0;
else
try_umat = cvtest::randInt(rng) % 2 != 0;
if( cn == 2 || try_umat )
cn = 1;
OCL_TUNING_MODE_ONLY(
cn = 1;
depth = CV_8U;
try_umat = true;
is_binary = false;
sizes[INPUT][0] = Size(1024,768)
);
types[INPUT][0] = CV_MAKETYPE(depth, cn);
types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_64FC1;
sizes[OUTPUT][0] = sizes[REF_OUTPUT][0] = cvSize(MOMENT_COUNT,1);
if(CV_MAT_DEPTH(types[INPUT][0])>=CV_32S)
sizes[INPUT][0].width = MAX(sizes[INPUT][0].width, 3);
is_binary = cvtest::randInt(rng) % 2 != 0;
coi = 0;
cvmat_allowed = true;
if( cn > 1 )
@ -149,7 +171,25 @@ void CV_MomentsTest::run_func()
{
CvMoments* m = (CvMoments*)test_mat[OUTPUT][0].ptr<double>();
double* others = (double*)(m + 1);
cvMoments( test_array[INPUT][0], m, is_binary );
if( try_umat )
{
UMat u;
test_mat[INPUT][0].clone().copyTo(u);
OCL_TUNING_MODE_ONLY(
static double ttime = 0;
static int ncalls = 0;
moments(u, is_binary != 0);
double t = (double)getTickCount());
Moments new_m = moments(u, is_binary != 0);
OCL_TUNING_MODE_ONLY(
ttime += (double)getTickCount() - t;
ncalls++;
printf("%g\n", ttime/ncalls/u.total()));
*m = new_m;
}
else
cvMoments( test_array[INPUT][0], m, is_binary );
others[0] = cvGetNormalizedCentralMoment( m, 2, 0 );
others[1] = cvGetNormalizedCentralMoment( m, 1, 1 );
others[2] = cvGetNormalizedCentralMoment( m, 0, 2 );

View File

@ -18,6 +18,8 @@ class_ignore_list = (
const_ignore_list = (
"CV_CAP_OPENNI",
"CV_CAP_PROP_OPENNI_",
"CV_CAP_INTELPERC",
"CV_CAP_PROP_INTELPERC_"
"WINDOW_AUTOSIZE",
"CV_WND_PROP_",
"CV_WINDOW_",

View File

@ -37,6 +37,10 @@ public class OpenCVLoader
*/
public static final String OPENCV_VERSION_2_4_7 = "2.4.7";
/**
* OpenCV Library version 2.4.8.
*/
public static final String OPENCV_VERSION_2_4_8 = "2.4.8";
/**
* Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java").

View File

@ -12,6 +12,7 @@
//
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
// Copyright (C) 2013, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// @Authors
@ -66,8 +67,8 @@ uint read_sumTex(IMAGE_INT32 img, sampler_t sam, int2 coord, int rows, int cols,
uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int cols, int elemPerRow)
{
#ifdef DISABLE_IMAGE2D
int x = clamp(convert_int_rte(coord.x), 0, cols - 1);
int y = clamp(convert_int_rte(coord.y), 0, rows - 1);
int x = clamp(round(coord.x), 0, cols - 1);
int y = clamp(round(coord.y), 0, rows - 1);
return img[elemPerRow * y + x];
#else
return (uchar)read_imageui(img, sam, coord).x;
@ -98,6 +99,7 @@ __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAM
#define CV_PI_F 3.14159265f
#endif
// Use integral image to calculate haar wavelets.
// N = 2
// for simple haar paatern
@ -114,10 +116,10 @@ float icvCalcHaarPatternSum_2(
F d = 0;
int2 dx1 = convert_int2_rte(ratio * src[0]);
int2 dy1 = convert_int2_rte(ratio * src[1]);
int2 dx2 = convert_int2_rte(ratio * src[2]);
int2 dy2 = convert_int2_rte(ratio * src[3]);
int2 dx1 = convert_int2(round(ratio * src[0]));
int2 dy1 = convert_int2(round(ratio * src[1]));
int2 dx2 = convert_int2(round(ratio * src[2]));
int2 dy2 = convert_int2(round(ratio * src[3]));
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
@ -136,106 +138,9 @@ float icvCalcHaarPatternSum_2(
return (float)d;
}
// N = 3
float icvCalcHaarPatternSum_3(
IMAGE_INT32 sumTex,
__constant float4 *src,
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
F d = 0;
int4 dx1 = convert_int4_rte(ratio * src[0]);
int4 dy1 = convert_int4_rte(ratio * src[1]);
int4 dx2 = convert_int4_rte(ratio * src[2]);
int4 dy2 = convert_int4_rte(ratio * src[3]);
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
return (float)d;
}
// N = 4
float icvCalcHaarPatternSum_4(
IMAGE_INT32 sumTex,
__constant float4 *src,
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
F d = 0;
int4 dx1 = convert_int4_rte(ratio * src[0]);
int4 dy1 = convert_int4_rte(ratio * src[1]);
int4 dx2 = convert_int4_rte(ratio * src[2]);
int4 dy2 = convert_int4_rte(ratio * src[3]);
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy1.w), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy2.w), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy1.w), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy2.w), rows, cols, elemPerRow );
d += t * src[4].w / ((dx2.w - dx1.w) * (dy2.w - dy1.w));
return (float)d;
}
////////////////////////////////////////////////////////////////////////
// Hessian
__constant float4 c_DX[5] = { (float4)(0, 3, 6, 0), (float4)(2, 2, 2, 0), (float4)(3, 6, 9, 0), (float4)(7, 7, 7, 0), (float4)(1, -2, 1, 0) };
__constant float4 c_DY[5] = { (float4)(2, 2, 2, 0), (float4)(0, 3, 6, 0), (float4)(7, 7, 7, 0), (float4)(3, 6, 9, 0), (float4)(1, -2, 1, 0) };
__constant float4 c_DXY[5] = { (float4)(1, 5, 1, 5), (float4)(1, 1, 5, 5), (float4)(4, 8, 4, 8), (float4)(4, 4, 8, 8), (float4)(1, -1, -1, 1) };// Use integral image to calculate haar wavelets.
__inline int calcSize(int octave, int layer)
{
/* Wavelet size at first layer of first octave. */
@ -250,6 +155,24 @@ __inline int calcSize(int octave, int layer)
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
}
// Calculate a derivative in an axis-aligned direction (x or y). The "plus1"
// boxes contribute 1 * (area), and the "minus2" box contributes -2 * (area).
// So the final computation is plus1a + plus1b - 2 * minus2. The corners are
// labeled A, B, C, and D, with A being the top left, B being top right, C
// being bottom left, and D being bottom right.
F calcAxisAlignedDerivative(
int plus1a_A, int plus1a_B, int plus1a_C, int plus1a_D, F plus1a_scale,
int plus1b_A, int plus1b_B, int plus1b_C, int plus1b_D, F plus1b_scale,
int minus2_A, int minus2_B, int minus2_C, int minus2_D, F minus2_scale)
{
F plus1a = plus1a_A - plus1a_B - plus1a_C + plus1a_D;
F plus1b = plus1b_A - plus1b_B - plus1b_C + plus1b_D;
F minus2 = minus2_A - minus2_B - minus2_C + minus2_D;
return (plus1a / plus1a_scale -
2.0f * minus2 / minus2_scale +
plus1b / plus1b_scale);
}
//calculate targeted layer per-pixel determinant and trace with an integral image
__kernel void icvCalcLayerDetAndTrace(
@ -264,7 +187,7 @@ __kernel void icvCalcLayerDetAndTrace(
int c_octave,
int c_layer_rows,
int sumTex_step
)
)
{
det_step /= sizeof(*det);
trace_step /= sizeof(*trace);
@ -288,16 +211,103 @@ __kernel void icvCalcLayerDetAndTrace(
if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
{
const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
int x = j << c_octave;
int y = i << c_octave;
float ratio = (float)size / 9;
// Precompute some commonly used values, which are used to offset
// texture coordinates in the integral image.
int r1 = round(ratio);
int r2 = round(ratio * 2.0f);
int r3 = round(ratio * 3.0f);
int r4 = round(ratio * 4.0f);
int r5 = round(ratio * 5.0f);
int r6 = round(ratio * 6.0f);
int r7 = round(ratio * 7.0f);
int r8 = round(ratio * 8.0f);
int r9 = round(ratio * 9.0f);
// Calculate the approximated derivative in the x-direction
F d = 0;
{
// Some of the pixels needed to compute the derivative are
// repeated, so we only don't duplicate the fetch here.
int t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t07 = read_sumTex( sumTex, sampler, (int2)(x, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t32 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t37 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t62 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t67 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r7), c_img_rows, c_img_cols, sumTex_step );
int t92 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r2), c_img_rows, c_img_cols, sumTex_step );
int t97 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r7), c_img_rows, c_img_cols, sumTex_step );
d = calcAxisAlignedDerivative(t02, t07, t32, t37, (r3) * (r7 - r2),
t62, t67, t92, t97, (r9 - r6) * (r7 - r2),
t32, t37, t62, t67, (r6 - r3) * (r7 - r2));
}
const float dx = (float)d;
// Calculate the approximated derivative in the y-direction
d = 0;
{
// Some of the pixels needed to compute the derivative are
// repeated, so we only don't duplicate the fetch here.
int t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sumTex_step );
int t23 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r3), c_img_rows, c_img_cols, sumTex_step );
int t70 = read_sumTex( sumTex, sampler, (int2)(x + r7, y), c_img_rows, c_img_cols, sumTex_step );
int t73 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r3), c_img_rows, c_img_cols, sumTex_step );
int t26 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r6), c_img_rows, c_img_cols, sumTex_step );
int t76 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r6), c_img_rows, c_img_cols, sumTex_step );
int t29 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r9), c_img_rows, c_img_cols, sumTex_step );
int t79 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r9), c_img_rows, c_img_cols, sumTex_step );
d = calcAxisAlignedDerivative(t20, t23, t70, t73, (r7 - r2) * (r3),
t26, t29, t76, t79, (r7 - r2) * (r9 - r6),
t23, t26, t73, t76, (r7 - r2) * (r6 - r3));
}
const float dy = (float)d;
// Calculate the approximated derivative in the xy-direction
d = 0;
{
// There's no saving us here, we just have to get all of the pixels in
// separate fetches
F t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r1), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r4), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r1), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sumTex_step );
d += t / ((r4 - r1) * (r4 - r1));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r1), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r4), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r1), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r4), c_img_rows, c_img_cols, sumTex_step );
d -= t / ((r8 - r5) * (r4 - r1));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r5), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r8), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r5), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r8), c_img_rows, c_img_cols, sumTex_step );
d -= t / ((r4 - r1) * (r8 - r5));
t = 0;
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r5), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r8), c_img_rows, c_img_cols, sumTex_step );
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r5), c_img_rows, c_img_cols, sumTex_step );
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r8), c_img_rows, c_img_cols, sumTex_step );
d += t / ((r8 - r5) * (r8 - r5));
}
const float dxy = (float)d;
det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy;
}
}
////////////////////////////////////////////////////////////////////////
// NONMAX
@ -309,10 +319,10 @@ bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int ro
float d = 0;
int dx1 = convert_int_rte(ratio * c_DM[0]);
int dy1 = convert_int_rte(ratio * c_DM[1]);
int dx2 = convert_int_rte(ratio * c_DM[2]);
int dy2 = convert_int_rte(ratio * c_DM[3]);
int dx1 = round(ratio * c_DM[0]);
int dy1 = round(ratio * c_DM[1]);
int dx2 = round(ratio * c_DM[2]);
int dy2 = round(ratio * c_DM[3]);
float t = 0;
@ -572,7 +582,7 @@ void icvFindMaximaInLayer(
}
// solve 3x3 linear system Ax=b for floating point input
inline bool solve3x3_float(volatile __local const float4 *A, volatile __local const float *b, volatile __local float *x)
inline bool solve3x3_float(const float4 *A, const float *b, float *x)
{
float det = A[0].x * (A[1].y * A[2].z - A[1].z * A[2].y)
- A[0].y * (A[1].x * A[2].z - A[1].z * A[2].x)
@ -651,7 +661,7 @@ void icvInterpolateKeypoint(
if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0)
{
volatile __local float dD[3];
float dD[3];
//dx
dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]);
@ -660,7 +670,7 @@ void icvInterpolateKeypoint(
//ds
dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]);
volatile __local float4 H[3];
float4 H[3];
//dxx
H[0].x = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
@ -681,7 +691,7 @@ void icvInterpolateKeypoint(
//dss
H[2].z = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
volatile __local float x[3];
float x[3];
if (solve3x3_float(H, dD, x))
{
@ -711,7 +721,7 @@ void icvInterpolateKeypoint(
sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the
wavelet pattern is balanced and symmetric around its center */
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
const int grad_wav_size = 2 * round(2.0f * s);
// check when grad_wav_size is too big
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
@ -737,9 +747,12 @@ void icvInterpolateKeypoint(
////////////////////////////////////////////////////////////////////////
// Orientation
#define ORI_SEARCH_INC 5
#define ORI_WIN 60
#define ORI_SAMPLES 113
#define ORI_WIN 60
#define ORI_SAMPLES 113
// The distance between samples in the beginning of the the reduction
#define ORI_RESPONSE_REDUCTION_WIDTH 48
#define ORI_RESPONSE_ARRAY_SIZE (ORI_RESPONSE_REDUCTION_WIDTH * 2)
__constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
__constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
@ -833,12 +846,15 @@ void icvCalcOrientation(
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
volatile __local float s_X[128];
volatile __local float s_Y[128];
volatile __local float s_angle[128];
__local float s_X[ORI_SAMPLES];
__local float s_Y[ORI_SAMPLES];
__local float s_angle[ORI_SAMPLES];
volatile __local float s_sumx[32 * 4];
volatile __local float s_sumy[32 * 4];
// Need to allocate enough to make the reduction work without accessing
// past the end of the array.
__local float s_sumx[ORI_RESPONSE_ARRAY_SIZE];
__local float s_sumy[ORI_RESPONSE_ARRAY_SIZE];
__local float s_mod[ORI_RESPONSE_ARRAY_SIZE];
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
@ -849,28 +865,60 @@ void icvCalcOrientation(
sampled in a circle of radius 6s using wavelets of size 4s.
We ensure the gradient wavelet size is even to ensure the
wavelet pattern is balanced and symmetric around its center */
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
const int grad_wav_size = 2 * round(2.0f * s);
// check when grad_wav_size is too big
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
return;
// Calc X, Y, angle and store it to shared memory
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
const int tid = get_local_id(0);
// Initialize values that are only used as part of the reduction later.
if (tid < ORI_RESPONSE_ARRAY_SIZE - ORI_LOCAL_SIZE) {
s_mod[tid + ORI_LOCAL_SIZE] = 0.0f;
}
float X = 0.0f, Y = 0.0f, angle = 0.0f;
float ratio = (float)grad_wav_size / 4;
if (tid < ORI_SAMPLES)
int r2 = round(ratio * 2.0);
int r4 = round(ratio * 4.0);
for (int i = tid; i < ORI_SAMPLES; i += ORI_LOCAL_SIZE )
{
float X = 0.0f, Y = 0.0f, angle = 0.0f;
const float margin = (float)(grad_wav_size - 1) / 2.0f;
const int x = convert_int_rte(featureX[get_group_id(0)] + c_aptX[tid] * s - margin);
const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin);
const int x = round(featureX[get_group_id(0)] + c_aptX[i] * s - margin);
const int y = round(featureY[get_group_id(0)] + c_aptY[i] * s - margin);
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
{
X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
float apt = c_aptW[i];
// Compute the haar sum without fetching duplicate pixels.
float t00 = read_sumTex( sumTex, sampler, (int2)(x, y), c_img_rows, c_img_cols, sum_step);
float t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sum_step);
float t04 = read_sumTex( sumTex, sampler, (int2)(x, y + r4), c_img_rows, c_img_cols, sum_step);
float t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sum_step);
float t24 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r4), c_img_rows, c_img_cols, sum_step);
float t40 = read_sumTex( sumTex, sampler, (int2)(x + r4, y), c_img_rows, c_img_cols, sum_step);
float t42 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r2), c_img_rows, c_img_cols, sum_step);
float t44 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sum_step);
F t = t00 - t04 - t20 + t24;
X -= t / ((r2) * (r4));
t = t20 - t24 - t40 + t44;
X += t / ((r4 - r2) * (r4));
t = t00 - t02 - t40 + t42;
Y += t / ((r2) * (r4));
t = t02 - t04 - t42 + t44;
Y -= t / ((r4) * (r4 - r2));
X = apt*X;
Y = apt*Y;
angle = atan2(Y, X);
@ -879,76 +927,61 @@ void icvCalcOrientation(
angle *= 180.0f / CV_PI_F;
}
s_X[i] = X;
s_Y[i] = Y;
s_angle[i] = angle;
}
s_X[tid] = X;
s_Y[tid] = Y;
s_angle[tid] = angle;
barrier(CLK_LOCAL_MEM_FENCE);
float bestx = 0, besty = 0, best_mod = 0;
float sumx = 0.0f, sumy = 0.0f;
const int dir = tid * ORI_SEARCH_INC;
#pragma unroll
for (int i = 0; i < ORI_SAMPLES; ++i) {
int angle = round(s_angle[i]);
#pragma unroll
for (int i = 0; i < 18; ++i)
{
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
int d = abs(angle - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[i];
sumy += s_Y[i];
}
}
s_sumx[tid] = sumx;
s_sumy[tid] = sumy;
s_mod[tid] = sumx*sumx + sumy*sumy;
barrier(CLK_LOCAL_MEM_FENCE);
volatile float sumx = 0.0f, sumy = 0.0f;
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx = s_X[get_local_id(0)];
sumy = s_Y[get_local_id(0)];
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 32]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 32];
sumy += s_Y[get_local_id(0) + 32];
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 64]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 64];
sumy += s_Y[get_local_id(0) + 64];
}
d = abs(convert_int_rte(s_angle[get_local_id(0) + 96]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96];
}
reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
{
best_mod = temp_mod;
bestx = sumx;
besty = sumy;
// This reduction searches for the longest wavelet response vector. The first
// step uses all of the work items in the workgroup to narrow the search
// down to the three candidates. It requires s_mod to have a few more
// elements alocated past the work-group size, which are pre-initialized to
// 0.0f above.
for(int t = ORI_RESPONSE_REDUCTION_WIDTH; t >= 3; t /= 2) {
if (tid < t) {
if (s_mod[tid] < s_mod[tid + t]) {
s_mod[tid] = s_mod[tid + t];
s_sumx[tid] = s_sumx[tid + t];
s_sumy[tid] = s_sumy[tid + t];
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
if (get_local_id(0) == 0)
{
s_X[get_local_id(1)] = bestx;
s_Y[get_local_id(1)] = besty;
s_angle[get_local_id(1)] = best_mod;
}
barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(1) == 0 && get_local_id(0) == 0)
// Do the final reduction and write out the result.
if (tid == 0)
{
int bestIdx = 0;
if (s_angle[1] > s_angle[bestIdx])
// The loop above narrowed the search of the longest vector to three
// possibilities. Pick the best here.
if (s_mod[1] > s_mod[bestIdx])
bestIdx = 1;
if (s_angle[2] > s_angle[bestIdx])
if (s_mod[2] > s_mod[bestIdx])
bestIdx = 2;
if (s_angle[3] > s_angle[bestIdx])
bestIdx = 3;
float kp_dir = atan2(s_Y[bestIdx], s_X[bestIdx]);
float kp_dir = atan2(s_sumy[bestIdx], s_sumx[bestIdx]);
if (kp_dir < 0)
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
@ -961,7 +994,6 @@ void icvCalcOrientation(
}
}
__kernel
void icvSetUpright(
__global float * keypoints,
@ -1035,8 +1067,8 @@ inline float linearFilter(
float out = 0.0f;
const int x1 = convert_int_rtn(x);
const int y1 = convert_int_rtn(y);
const int x1 = round(x);
const int y1 = round(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;

View File

@ -46,6 +46,7 @@
#ifdef HAVE_OPENCV_OCL
#include <cstdio>
#include <sstream>
#include "opencl_kernels.hpp"
using namespace cv;
@ -57,18 +58,25 @@ namespace cv
{
namespace ocl
{
// The number of degrees between orientation samples in calcOrientation
const static int ORI_SEARCH_INC = 5;
// The local size of the calcOrientation kernel
const static int ORI_LOCAL_SIZE = (360 / ORI_SEARCH_INC);
static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{
char optBuf [100] = {0};
char * optBufPtr = optBuf;
std::stringstream optsStr;
optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optsStr.str().c_str());
size_t wave_size = queryWaveFrontSize(kernel);
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
optsStr << "-D WAVE_SIZE=" << wave_size;
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
}
}
}
@ -601,8 +609,8 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
size_t localThreads[3] = {32, 4, 1};
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
size_t localThreads[3] = {ORI_LOCAL_SIZE, 1, 1};
size_t globalThreads[3] = {nFeatures * localThreads[0], 1, 1};
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
}

View File

@ -44,6 +44,12 @@ PERF_TEST_P(ImageName_MinSize, CascadeClassifierLBPFrontalFace,
cc.detectMultiScale(img, faces, 1.1, 3, 0, minSize);
stopTimer();
}
// for some reason OpenCL version detects the face, which CPU version does not detect, we just remove it
// TODO better solution: implement smart way of comparing two set of rectangles
if( filename == "cv/shared/1_itseez-0000492.png" && faces.size() == (size_t)3 )
{
faces.erase(faces.begin());
}
std::sort(faces.begin(), faces.end(), comparators::RectLess());
SANITY_CHECK(faces, 3.001 * faces.size());

View File

@ -654,6 +654,7 @@ bool LBPEvaluator::Feature :: read(const FileNode& node )
LBPEvaluator::LBPEvaluator()
{
features = makePtr<std::vector<Feature> >();
optfeatures = makePtr<std::vector<OptFeature> >();
}
LBPEvaluator::~LBPEvaluator()
{
@ -662,11 +663,12 @@ LBPEvaluator::~LBPEvaluator()
bool LBPEvaluator::read( const FileNode& node )
{
features->resize(node.size());
featuresPtr = &(*features)[0];
optfeaturesPtr = &(*optfeatures)[0];
FileNodeIterator it = node.begin(), it_end = node.end();
std::vector<Feature>& ff = *features;
for(int i = 0; it != it_end; ++it, i++)
{
if(!featuresPtr[i].read(*it))
if(!ff[i].read(*it))
return false;
}
return true;
@ -677,31 +679,58 @@ Ptr<FeatureEvaluator> LBPEvaluator::clone() const
Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>();
ret->origWinSize = origWinSize;
ret->features = features;
ret->featuresPtr = &(*ret->features)[0];
ret->optfeatures = optfeatures;
ret->optfeaturesPtr = ret->optfeatures.empty() ? 0 : &(*ret->optfeatures)[0];
ret->sum0 = sum0, ret->sum = sum;
ret->normrect = normrect;
ret->offset = offset;
ret->pwin = pwin;
return ret;
}
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size )
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize )
{
Mat image = _image.getMat();
int rn = image.rows+1, cn = image.cols+1;
origWinSize = _origWinSize;
Size imgsz = _image.size();
int cols = imgsz.width, rows = imgsz.height;
if( image.cols < origWinSize.width || image.rows < origWinSize.height )
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
return false;
if( sum0.rows < rn || sum0.cols < cn )
origWinSize = _origWinSize;
int rn = _sumSize.height, cn = _sumSize.width;
int sumStep;
CV_Assert(rn >= rows+1 && cn >= cols+1);
if( _image.isUMat() )
{
usum0.create(rn, cn, CV_32S);
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1));
integral(_image, usum, noArray(), noArray(), CV_32S);
sumStep = (int)(usum.step/usum.elemSize());
}
else
{
sum0.create(rn, cn, CV_32S);
sum = Mat(rn, cn, CV_32S, sum0.data);
integral(image, sum);
sum = sum0(Rect(0, 0, cols+1, rows+1));
integral(_image, sum, noArray(), noArray(), CV_32S);
sumStep = (int)(sum.step/sum.elemSize());
}
size_t fi, nfeatures = features->size();
const std::vector<Feature>& ff = *features;
if( sumSize0 != _sumSize )
{
optfeatures->resize(nfeatures);
optfeaturesPtr = &(*optfeatures)[0];
for( fi = 0; fi < nfeatures; fi++ )
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep );
}
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
copyVectorToUMat(*optfeatures, ufbuf);
sumSize0 = _sumSize;
for( fi = 0; fi < nfeatures; fi++ )
featuresPtr[fi].updatePtrs( sum );
return true;
}
@ -711,10 +740,18 @@ bool LBPEvaluator::setWindow( Point pt )
pt.x + origWinSize.width >= sum.cols ||
pt.y + origWinSize.height >= sum.rows )
return false;
offset = pt.y * ((int)sum.step/sizeof(int)) + pt.x;
pwin = &sum.at<int>(pt);
return true;
}
void LBPEvaluator::getUMats(std::vector<UMat>& bufs)
{
bufs.clear();
bufs.push_back(usum);
bufs.push_back(ufbuf);
}
//---------------------------------------------- HOGEvaluator ---------------------------------------
bool HOGEvaluator::Feature :: read( const FileNode& node )
{
@ -1133,50 +1170,84 @@ bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processin
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
int yStep, double factor, Size sumSize0 )
{
const int VECTOR_SIZE = 1;
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
if( haar.empty() )
return false;
haar->setImage(_image, data.origWinSize, sumSize0);
if( cascadeKernel.empty() )
{
cascadeKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc,
format("-D VECTOR_SIZE=%d", VECTOR_SIZE));
if( cascadeKernel.empty() )
return false;
}
int featureType = getFeatureType();
std::vector<UMat> bufs;
size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep };
bool ok = false;
if( ustages.empty() )
{
copyVectorToUMat(data.stages, ustages);
copyVectorToUMat(data.stumps, ustumps);
if( !data.subsets.empty() )
copyVectorToUMat(data.subsets, usubsets);
}
std::vector<UMat> bufs;
haar->getUMats(bufs);
CV_Assert(bufs.size() == 3);
if( featureType == FeatureEvaluator::HAAR )
{
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
if( haar.empty() )
return false;
Rect normrect = haar->getNormRect();
haar->setImage(_image, data.origWinSize, sumSize0);
if( haarKernel.empty() )
{
haarKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
if( haarKernel.empty() )
return false;
}
//processingRectSize = Size(yStep, yStep);
size_t globalsize[] = { (processingRectSize.width/yStep + VECTOR_SIZE-1)/VECTOR_SIZE, processingRectSize.height/yStep };
haar->getUMats(bufs);
Rect normrect = haar->getNormRect();
cascadeKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
haarKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
// cascade classifier
(int)data.stages.size(),
ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps),
// cascade classifier
(int)data.stages.size(),
ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps),
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
normrect, data.origWinSize, MAX_FACES);
bool ok = cascadeKernel.run(2, globalsize, 0, true);
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
normrect, data.origWinSize, MAX_FACES);
ok = haarKernel.run(2, globalsize, 0, true);
}
else if( featureType == FeatureEvaluator::LBP )
{
Ptr<LBPEvaluator> lbp = featureEvaluator.dynamicCast<LBPEvaluator>();
if( lbp.empty() )
return false;
lbp->setImage(_image, data.origWinSize, sumSize0);
if( lbpKernel.empty() )
{
lbpKernel.create("runLBPClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
if( lbpKernel.empty() )
return false;
}
lbp->getUMats(bufs);
int subsetSize = (data.ncategories + 31)/32;
lbpKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
ocl::KernelArg::PtrReadOnly(bufs[1]), // optfeatures
// cascade classifier
(int)data.stages.size(),
ocl::KernelArg::PtrReadOnly(ustages),
ocl::KernelArg::PtrReadOnly(ustumps),
ocl::KernelArg::PtrReadOnly(usubsets),
subsetSize,
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
processingRectSize,
yStep, (float)factor,
data.origWinSize, MAX_FACES);
ok = lbpKernel.run(2, globalsize, 0, true);
}
//CV_Assert(ok);
return ok;
}
@ -1225,6 +1296,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
double scaleFactor, Size minObjectSize, Size maxObjectSize,
bool outputRejectLevels )
{
int featureType = getFeatureType();
Size imgsz = _image.size();
int imgtype = _image.type();
@ -1238,7 +1310,9 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
maxObjectSize = imgsz;
bool use_ocl = ocl::useOpenCL() &&
getFeatureType() == FeatureEvaluator::HAAR &&
(featureType == FeatureEvaluator::HAAR ||
featureType == FeatureEvaluator::LBP) &&
ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
!isOldFormatCascade() &&
data.isStumpBased() &&
maskGenerator.empty() &&
@ -1564,7 +1638,8 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
bool CascadeClassifierImpl::read_(const FileNode& root)
{
tryOpenCL = true;
cascadeKernel = ocl::Kernel();
haarKernel = ocl::Kernel();
lbpKernel = ocl::Kernel();
ustages.release();
ustumps.release();
if( !data.read(root) )

View File

@ -149,7 +149,7 @@ protected:
Ptr<MaskGenerator> maskGenerator;
UMat ugrayImage, uimageBuffer;
UMat ufacepos, ustages, ustumps, usubsets;
ocl::Kernel cascadeKernel;
ocl::Kernel haarKernel, lbpKernel;
bool tryOpenCL;
Mutex mtx;
@ -250,13 +250,11 @@ public:
struct Feature
{
Feature();
bool read( const FileNode& node );
bool tilted;
enum { RECT_NUM = 3 };
struct
{
Rect r;
@ -369,14 +367,20 @@ public:
{
Feature();
Feature( int x, int y, int _block_w, int _block_h ) :
rect(x, y, _block_w, _block_h) {}
rect(x, y, _block_w, _block_h) {}
int calc( int offset ) const;
void updatePtrs( const Mat& sum );
bool read(const FileNode& node );
Rect rect; // weight and height for block
const int* p[16]; // fast
};
struct OptFeature
{
OptFeature();
int calc( const int* pwin ) const;
void setOffsets( const Feature& _f, int step );
int ofs[16];
};
LBPEvaluator();
@ -388,55 +392,60 @@ public:
virtual bool setImage(InputArray image, Size _origWinSize, Size);
virtual bool setWindow(Point pt);
virtual void getUMats(std::vector<UMat>& bufs);
int operator()(int featureIdx) const
{ return featuresPtr[featureIdx].calc(offset); }
{ return optfeaturesPtr[featureIdx].calc(pwin); }
virtual int calcCat(int featureIdx) const
{ return (*this)(featureIdx); }
protected:
Size origWinSize;
Size origWinSize, sumSize0;
Ptr<std::vector<Feature> > features;
Feature* featuresPtr; // optimization
Mat sum0, sum;
Rect normrect;
Ptr<std::vector<OptFeature> > optfeatures;
OptFeature* optfeaturesPtr; // optimization
int offset;
Mat sum0, sum;
UMat usum0, usum, ufbuf;
const int* pwin;
};
inline LBPEvaluator::Feature :: Feature()
{
rect = Rect();
}
inline LBPEvaluator::OptFeature :: OptFeature()
{
for( int i = 0; i < 16; i++ )
p[i] = 0;
ofs[i] = 0;
}
inline int LBPEvaluator::Feature :: calc( int _offset ) const
inline int LBPEvaluator::OptFeature :: calc( const int* p ) const
{
int cval = CALC_SUM_( p[5], p[6], p[9], p[10], _offset );
int cval = CALC_SUM_OFS_( ofs[5], ofs[6], ofs[9], ofs[10], p );
return (CALC_SUM_( p[0], p[1], p[4], p[5], _offset ) >= cval ? 128 : 0) | // 0
(CALC_SUM_( p[1], p[2], p[5], p[6], _offset ) >= cval ? 64 : 0) | // 1
(CALC_SUM_( p[2], p[3], p[6], p[7], _offset ) >= cval ? 32 : 0) | // 2
(CALC_SUM_( p[6], p[7], p[10], p[11], _offset ) >= cval ? 16 : 0) | // 5
(CALC_SUM_( p[10], p[11], p[14], p[15], _offset ) >= cval ? 8 : 0)| // 8
(CALC_SUM_( p[9], p[10], p[13], p[14], _offset ) >= cval ? 4 : 0)| // 7
(CALC_SUM_( p[8], p[9], p[12], p[13], _offset ) >= cval ? 2 : 0)| // 6
(CALC_SUM_( p[4], p[5], p[8], p[9], _offset ) >= cval ? 1 : 0);
return (CALC_SUM_OFS_( ofs[0], ofs[1], ofs[4], ofs[5], p ) >= cval ? 128 : 0) | // 0
(CALC_SUM_OFS_( ofs[1], ofs[2], ofs[5], ofs[6], p ) >= cval ? 64 : 0) | // 1
(CALC_SUM_OFS_( ofs[2], ofs[3], ofs[6], ofs[7], p ) >= cval ? 32 : 0) | // 2
(CALC_SUM_OFS_( ofs[6], ofs[7], ofs[10], ofs[11], p ) >= cval ? 16 : 0) | // 5
(CALC_SUM_OFS_( ofs[10], ofs[11], ofs[14], ofs[15], p ) >= cval ? 8 : 0)| // 8
(CALC_SUM_OFS_( ofs[9], ofs[10], ofs[13], ofs[14], p ) >= cval ? 4 : 0)| // 7
(CALC_SUM_OFS_( ofs[8], ofs[9], ofs[12], ofs[13], p ) >= cval ? 2 : 0)| // 6
(CALC_SUM_OFS_( ofs[4], ofs[5], ofs[8], ofs[9], p ) >= cval ? 1 : 0);
}
inline void LBPEvaluator::Feature :: updatePtrs( const Mat& _sum )
inline void LBPEvaluator::OptFeature :: setOffsets( const Feature& _f, int step )
{
const int* ptr = (const int*)_sum.data;
size_t step = _sum.step/sizeof(ptr[0]);
Rect tr = rect;
CV_SUM_PTRS( p[0], p[1], p[4], p[5], ptr, tr, step );
tr.x += 2*rect.width;
CV_SUM_PTRS( p[2], p[3], p[6], p[7], ptr, tr, step );
tr.y += 2*rect.height;
CV_SUM_PTRS( p[10], p[11], p[14], p[15], ptr, tr, step );
tr.x -= 2*rect.width;
CV_SUM_PTRS( p[8], p[9], p[12], p[13], ptr, tr, step );
Rect tr = _f.rect;
CV_SUM_OFS( ofs[0], ofs[1], ofs[4], ofs[5], 0, tr, step );
tr.x += 2*_f.rect.width;
CV_SUM_OFS( ofs[2], ofs[3], ofs[6], ofs[7], 0, tr, step );
tr.y += 2*_f.rect.height;
CV_SUM_OFS( ofs[10], ofs[11], ofs[14], ofs[15], 0, tr, step );
tr.x -= 2*_f.rect.width;
CV_SUM_OFS( ofs[8], ofs[9], ofs[12], ofs[13], 0, tr, step );
}
//---------------------------------------------- HOGEvaluator -------------------------------------------

View File

@ -336,7 +336,7 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
out->isStumpBased &= node_count == 1;
}
}
/*
#ifdef HAVE_IPP
int can_use_ipp = !out->has_tilted_features && !out->is_tree && out->isStumpBased;
@ -392,7 +392,7 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
}
}
#endif
*/
cascade->hid_cascade = out;
assert( (char*)haar_node_ptr - (char*)out <= datasize );

View File

@ -1,19 +1,22 @@
///////////////////////////// OpenCL kernels for face detection //////////////////////////////
////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
typedef struct __attribute__((aligned(4))) OptFeature
typedef struct __attribute__((aligned(4))) OptHaarFeature
{
int4 ofs[3] __attribute__((aligned (4)));
float4 weight __attribute__((aligned (4)));
}
OptFeature;
OptHaarFeature;
typedef struct __attribute__((aligned(4))) OptLBPFeature
{
int16 ofs __attribute__((aligned (4)));
}
OptLBPFeature;
typedef struct __attribute__((aligned(4))) Stump
{
int featureIdx __attribute__((aligned (4)));
float threshold __attribute__((aligned (4))); // for ordered features only
float left __attribute__((aligned (4)));
float right __attribute__((aligned (4)));
float4 st __attribute__((aligned (4)));
}
Stump;
@ -30,7 +33,7 @@ __kernel void runHaarClassifierStump(
int sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures,
__global const OptHaarFeature* optfeatures,
int nstages,
__global const Stage* stages,
@ -47,11 +50,8 @@ __kernel void runHaarClassifierStump(
if( ix < imgsize.x && iy < imgsize.y )
{
int ntrees;
int stageIdx, i;
float s = 0.f;
int stageIdx;
__global const Stump* stump = stumps;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
@ -61,20 +61,19 @@ __kernel void runHaarClassifierStump(
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight, vsval;
int4 ofs, ofs0, ofs1, ofs2;
nf = nf > 0 ? nf : 1.f;
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
ntrees = stages[stageIdx].ntrees;
s = 0.f;
int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f;
for( i = 0; i < ntrees; i++, stump++ )
{
f = optfeatures + stump->featureIdx;
weight = f->weight;
float4 st = stump->st;
__global const OptHaarFeature* f = optfeatures + as_int(st.x);
float4 weight = f->weight;
ofs = f->ofs[0];
int4 ofs = f->ofs[0];
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
ofs = f->ofs[1];
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
@ -84,7 +83,7 @@ __kernel void runHaarClassifierStump(
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
}
s += (sval < stump->threshold*nf) ? stump->left : stump->right;
s += (sval < st.y*nf) ? st.z : st.w;
}
if( s < stages[stageIdx].threshold )
@ -106,13 +105,11 @@ __kernel void runHaarClassifierStump(
}
}
#if 0
__kernel void runLBPClassifierStump(
__global const int* sum,
int sumstep, int sumoffset,
__global const int* sqsum,
int sqsumstep, int sqsumoffset,
__global const OptFeature* optfeatures,
__global const OptLBPFeature* optfeatures,
int nstages,
__global const Stage* stages,
@ -122,50 +119,48 @@ __kernel void runLBPClassifierStump(
volatile __global int* facepos,
int2 imgsize, int xyscale, float factor,
int4 normrect, int2 windowsize, int maxFaces)
int2 windowsize, int maxFaces)
{
int ix = get_global_id(0)*xyscale*VECTOR_SIZE;
int ix = get_global_id(0)*xyscale;
int iy = get_global_id(1)*xyscale;
sumstep /= sizeof(int);
sqsumstep /= sizeof(int);
if( ix < imgsize.x && iy < imgsize.y )
{
int ntrees;
int stageIdx, i;
float s = 0.f;
int stageIdx;
__global const Stump* stump = stumps;
__global const int* bitset = bitsets;
__global const OptFeature* f;
__global const int* psum = sum + mad24(iy, sumstep, ix);
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
int normarea = normrect.z * normrect.w;
float invarea = 1.f/normarea;
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
float4 weight;
int4 ofs;
nf = nf > 0 ? nf : 1.f;
__global const int* p = sum + mad24(iy, sumstep, ix);
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
{
ntrees = stages[stageIdx].ntrees;
s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
int i, ntrees = stages[stageIdx].ntrees;
float s = 0.f;
for( i = 0; i < ntrees; i++, stump++, bitsets += bitsetSize )
{
f = optfeatures + stump->featureIdx;
float4 st = stump->st;
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
int16 ofs = f->ofs;
weight = f->weight;
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
// compute LBP feature to val
s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right;
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
s += (bitsets[idx] & (1 << mask)) ? st.z : st.w;
}
if( s < stages[stageIdx].threshold )
break;
break;
}
if( stageIdx == nstages )
@ -182,4 +177,3 @@ __kernel void runLBPClassifierStump(
}
}
}
#endif

View File

@ -699,3 +699,138 @@ Returns block descriptors computed for the whole image.
* **DESCR_FORMAT_COL_BY_COL** - Column-major order.
The function is mainly used to learn the classifier.
ocl::ORB_OCL
--------------
.. ocv:class:: ocl::ORB_OCL
Class for extracting ORB features and descriptors from an image. ::
class ORB_OCL
{
public:
enum
{
X_ROW = 0,
Y_ROW,
RESPONSE_ROW,
ANGLE_ROW,
OCTAVE_ROW,
SIZE_ROW,
ROWS_COUNT
};
enum
{
DEFAULT_FAST_THRESHOLD = 20
};
explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f,
int nLevels = 8, int edgeThreshold = 31,
int firstLevel = 0, int WTA_K = 2,
int scoreType = 0, int patchSize = 31);
void operator()(const oclMat& image, const oclMat& mask,
std::vector<KeyPoint>& keypoints);
void operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
void operator()(const oclMat& image, const oclMat& mask,
std::vector<KeyPoint>& keypoints, oclMat& descriptors);
void operator()(const oclMat& image, const oclMat& mask,
oclMat& keypoints, oclMat& descriptors);
void downloadKeyPoints(oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
void convertKeyPoints(Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
int descriptorSize() const;
int descriptorType() const;
int defaultNorm() const;
void setFastParams(int threshold, bool nonmaxSupression = true);
void release();
bool blurForDescriptor;
};
The class implements ORB feature detection and description algorithm.
ocl::ORB_OCL::ORB_OCL
------------------------
Constructor.
.. ocv:function:: ocl::ORB_OCL::ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31)
:param nfeatures: The maximum number of features to retain.
:param scaleFactor: Pyramid decimation ratio, greater than 1. ``scaleFactor==2`` means the classical pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor will mean that to cover certain scale range you will need more pyramid levels and so the speed will suffer.
:param nlevels: The number of pyramid levels. The smallest level will have linear size equal to ``input_image_linear_size/pow(scaleFactor, nlevels)``.
:param edgeThreshold: This is size of the border where the features are not detected. It should roughly match the ``patchSize`` parameter.
:param firstLevel: It should be 0 in the current implementation.
:param WTA_K: The number of points that produce each element of the oriented BRIEF descriptor. The default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 random points (of course, those point coordinates are random, but they are generated from the pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, denoted as ``NORM_HAMMING2`` (2 bits per bin). When ``WTA_K=4``, we take 4 random points to compute each bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3).
:param scoreType: The default HARRIS_SCORE means that Harris algorithm is used to rank features (the score is written to ``KeyPoint::score`` and is used to retain best ``nfeatures`` features); FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, but it is a little faster to compute.
:param patchSize: size of the patch used by the oriented BRIEF descriptor. Of course, on smaller pyramid layers the perceived image area covered by a feature will be larger.
ocl::ORB_OCL::operator()
--------------------------
Detects keypoints and computes descriptors for them.
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints)
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors)
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors)
:param image: Input 8-bit grayscale image.
:param mask: Optional input mask that marks the regions where we should detect features.
:param keypoints: The input/output vector of keypoints. Can be stored both in host and device memory. For device memory:
* ``X_ROW`` contains the horizontal coordinate of the i'th feature.
* ``Y_ROW`` contains the vertical coordinate of the i'th feature.
* ``RESPONSE_ROW`` contains the response of the i'th feature.
* ``ANGLE_ROW`` contains the orientation of the i'th feature.
* ``RESPONSE_ROW`` contains the octave of the i'th feature.
* ``ANGLE_ROW`` contains the size of the i'th feature.
:param descriptors: Computed descriptors. if ``blurForDescriptor`` is true, image will be blurred before descriptors calculation.
ocl::ORB_OCL::downloadKeyPoints
---------------------------------
Download keypoints from device to host memory.
.. ocv:function:: static void ocl::ORB_OCL::downloadKeyPoints( const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints )
ocl::ORB_OCL::convertKeyPoints
--------------------------------
Converts keypoints from OCL representation to vector of ``KeyPoint``.
.. ocv:function:: static void ocl::ORB_OCL::convertKeyPoints( const Mat& d_keypoints, std::vector<KeyPoint>& keypoints )
ocl::ORB_OCL::release
-----------------------
Releases inner buffer memory.
.. ocv:function:: void ocl::ORB_OCL::release()

View File

@ -287,7 +287,7 @@ ocl::createSeparableLinearFilter_GPU
----------------------------------------
Creates a separable linear filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT)
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
@ -303,6 +303,8 @@ Creates a separable linear filter engine.
:param bordertype: Pixel extrapolation method.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
@ -334,7 +336,7 @@ ocl::createDerivFilter_GPU
------------------------------
Creates a filter engine for the generalized Sobel operator.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT )
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
@ -348,6 +350,8 @@ Creates a filter engine for the generalized Sobel operator.
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter`
@ -405,7 +409,7 @@ ocl::createGaussianFilter_GPU
---------------------------------
Creates a Gaussian filter engine.
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT)
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
:param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported.
@ -417,6 +421,8 @@ Creates a Gaussian filter engine.
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
:param imgSize: Source image size to choose optimal method for processing.
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter`
ocl::GaussianBlur

View File

@ -695,17 +695,17 @@ namespace cv
//! returns the separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
//! returns the separable filter engine with the specified filters
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
const Ptr<BaseColumnFilter_GPU> &columnFilter);
//! returns the Gaussian filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
//! returns filter engine for the generalized Sobel operator
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
//! applies Laplacian operator to the image
// supports only ksize = 1 and ksize = 3
@ -1439,8 +1439,10 @@ namespace cv
oclMat Dx_;
oclMat Dy_;
oclMat eig_;
oclMat eig_minmax_;
oclMat minMaxbuf_;
oclMat tmpCorners_;
oclMat counter_;
};
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
@ -1533,6 +1535,110 @@ namespace cv
int bytes;
};
////////////////////////////////// ORB Descriptor Extractor //////////////////////////////////
class CV_EXPORTS ORB_OCL
{
public:
enum
{
X_ROW = 0,
Y_ROW,
RESPONSE_ROW,
ANGLE_ROW,
OCTAVE_ROW,
SIZE_ROW,
ROWS_COUNT
};
enum
{
DEFAULT_FAST_THRESHOLD = 20
};
//! Constructor
explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
//! Compute the ORB features on an image
//! image - the image to compute the features (supports only CV_8UC1 images)
//! mask - the mask to apply
//! keypoints - the resulting keypoints
void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
//! Compute the ORB features and descriptors on an image
//! image - the image to compute the features (supports only CV_8UC1 images)
//! mask - the mask to apply
//! keypoints - the resulting keypoints
//! descriptors - descriptors array
void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors);
void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors);
//! download keypoints from device to host memory
static void downloadKeyPoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
//! convert keypoints to KeyPoint vector
static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
//! returns the descriptor size in bytes
inline int descriptorSize() const { return kBytes; }
inline int descriptorType() const { return CV_8U; }
inline int defaultNorm() const { return NORM_HAMMING; }
inline void setFastParams(int threshold, bool nonmaxSupression = true)
{
fastDetector_.threshold = threshold;
fastDetector_.nonmaxSupression = nonmaxSupression;
}
//! release temporary buffer's memory
void release();
//! if true, image will be blurred before descriptors calculation
bool blurForDescriptor;
private:
enum { kBytes = 32 };
void buildScalePyramids(const oclMat& image, const oclMat& mask);
void computeKeyPointsPyramid();
void computeDescriptors(oclMat& descriptors);
void mergeKeyPoints(oclMat& keypoints);
int nFeatures_;
float scaleFactor_;
int nLevels_;
int edgeThreshold_;
int firstLevel_;
int WTA_K_;
int scoreType_;
int patchSize_;
// The number of desired features per scale
std::vector<size_t> n_features_per_level_;
// Points to compute BRIEF descriptors from
oclMat pattern_;
std::vector<oclMat> imagePyr_;
std::vector<oclMat> maskPyr_;
oclMat buf_;
std::vector<oclMat> keyPointsPyr_;
std::vector<int> keyPointsCount_;
FAST_OCL fastDetector_;
Ptr<ocl::FilterEngine_GPU> blurFilter;
oclMat d_keypoints_;
oclMat uMax_;
};
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
class CV_EXPORTS PyrLKOpticalFlow

View File

@ -72,5 +72,5 @@ int main(int argc, char ** argv)
{
::perf::TestBase::setModulePerformanceStrategy(::perf::PERF_STRATEGY_SIMPLE);
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls, dumpOpenCLDevice())
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls, ::dumpOpenCLDevice())
}

View File

@ -0,0 +1,103 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
// Authors:
// * Peter Andreas Entschev, peter@entschev.com
//
//M*/
#include "perf_precomp.hpp"
using namespace perf;
/////////////////// ORB ///////////////////
typedef std::tr1::tuple<std::string, int> Image_NFeatures_t;
typedef perf::TestBaseWithParam<Image_NFeatures_t> Image_NFeatures;
PERF_TEST_P(Image_NFeatures, ORB,
testing::Combine(testing::Values<string>("gpu/perf/aloe.png"),
testing::Values(4000)))
{
declare.time(300.0);
const Image_NFeatures_t params = GetParam();
const std::string imgFile = std::tr1::get<0>(params);
const int nFeatures = std::tr1::get<1>(params);
const cv::Mat img = imread(getDataPath(imgFile), cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(img.empty());
if (RUN_OCL_IMPL)
{
cv::ocl::ORB_OCL d_orb(nFeatures);
const cv::ocl::oclMat d_img(img);
cv::ocl::oclMat d_keypoints, d_descriptors;
TEST_CYCLE() d_orb(d_img, cv::ocl::oclMat(), d_keypoints, d_descriptors);
std::vector<cv::KeyPoint> ocl_keypoints;
d_orb.downloadKeyPoints(d_keypoints, ocl_keypoints);
cv::Mat ocl_descriptors(d_descriptors);
ocl_keypoints.resize(10);
ocl_descriptors = ocl_descriptors.rowRange(0, 10);
sortKeyPoints(ocl_keypoints, ocl_descriptors);
SANITY_CHECK_KEYPOINTS(ocl_keypoints, 1e-4);
SANITY_CHECK(ocl_descriptors);
}
else if (RUN_PLAIN_IMPL)
{
cv::ORB orb(nFeatures);
std::vector<cv::KeyPoint> cpu_keypoints;
cv::Mat cpu_descriptors;
TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
SANITY_CHECK(cpu_descriptors);
}
else
OCL_PERF_ELSE;
}

View File

@ -59,6 +59,8 @@
# endif
#endif
#define CV_BUILD_OCL_MODULE
#include <iomanip>
#include <stdexcept>
#include <string>

View File

@ -56,8 +56,19 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
String build_options = format("-D DEPTH_%d", src.depth());
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
else
pixels_per_work_item = 1;
}
String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty())
build_options = build_options + additionalOptions;
@ -66,7 +77,6 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -77,6 +87,73 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(),
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data1.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
if (!data2.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
@ -89,7 +166,50 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
int pixels_per_work_item = 1;
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
pixels_per_work_item = 4;
else if (src.cols % 2 == 0)
pixels_per_work_item = 2;
else
pixels_per_work_item = 1;
}
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
if (!additionalOptions.empty())
build_options += additionalOptions;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options = build_options + additionalOptions;
@ -101,7 +221,6 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -119,10 +238,13 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
{
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
if (!additionalOptions.empty())
build_options += additionalOptions;
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
@ -136,6 +258,36 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
if (!data.empty())
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
#else
size_t lt[3] = { 16, 16, 1 };
#endif
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
}
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
{
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
size_t gt[3] = { dst.cols, dst.rows, 1 };
#ifdef ANDROID
size_t lt[3] = { 16, 10, 1 };
@ -147,8 +299,8 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
{
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
src.depth(), greenbits, dst.channels());
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
src.depth(), greenbits, dst.channels(), bidx);
int src_offset = src.offset >> 1, src_step = src.step >> 1;
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
@ -157,7 +309,6 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -174,8 +325,8 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
{
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
src.depth(), greenbits, src.channels());
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
src.depth(), greenbits, src.channels(), bidx);
int src_offset = (int)src.offset, src_step = (int)src.step;
int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
@ -184,7 +335,6 @@ static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenb
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
@ -272,7 +422,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
CV_Assert(scn == 1);
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
dst.create(sz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, 0, "Gray2RGB");
fromGray_caller(src, dst, 0, "Gray2RGB");
break;
}
case COLOR_BGR2YUV: case COLOR_RGB2YUV:
@ -303,7 +453,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
Size dstSz(sz.width, sz.height * 2 / 3);
dst.create(dstSz, CV_MAKETYPE(depth, dcn));
toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
break;
}
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
@ -460,11 +610,11 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
initialized = true;
}
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
return;
}
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
break;
}
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
@ -483,7 +633,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
dst.create(sz, CV_MAKETYPE(depth, dcn));
std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
break;
}
case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:

View File

@ -169,7 +169,7 @@ void cv::ocl::fft_teardown()
// bake a new plan
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), depth(_depth), dst_step(_dst_step), flags(_flags), type(_type)
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), depth(_depth), flags(_flags), type(_type)
{
fft_setup();

View File

@ -741,6 +741,135 @@ void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &ke
f->apply(src, dst);
}
const int optimizedSepFilterLocalSize = 16;
static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
const Mat &row_kernel, const Mat &col_kernel, int bordertype = BORDER_DEFAULT)
{
size_t lt2[3] = {optimizedSepFilterLocalSize, optimizedSepFilterLocalSize, 1};
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
unsigned int src_pitch = src.step;
unsigned int dst_pitch = dst.step;
int src_offset_x = (src.offset % src.step) / src.elemSize();
int src_offset_y = src.offset / src.step;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
row_kernel.rows / 2, col_kernel.rows / 2 );
option += " -D KERNEL_MATRIX_X=";
for(int i=0; i<row_kernel.rows; i++)
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
option += "0x0";
option += " -D KERNEL_MATRIX_Y=";
for(int i=0; i<col_kernel.rows; i++)
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
option += "0x0";
switch(src.type())
{
case CV_8UC1:
option += " -D SRCTYPE=uchar -D CONVERT_SRCTYPE=convert_float -D WORKTYPE=float";
break;
case CV_32FC1:
option += " -D SRCTYPE=float -D CONVERT_SRCTYPE= -D WORKTYPE=float";
break;
case CV_8UC2:
option += " -D SRCTYPE=uchar2 -D CONVERT_SRCTYPE=convert_float2 -D WORKTYPE=float2";
break;
case CV_32FC2:
option += " -D SRCTYPE=float2 -D CONVERT_SRCTYPE= -D WORKTYPE=float2";
break;
case CV_8UC3:
option += " -D SRCTYPE=uchar3 -D CONVERT_SRCTYPE=convert_float3 -D WORKTYPE=float3";
break;
case CV_32FC3:
option += " -D SRCTYPE=float3 -D CONVERT_SRCTYPE= -D WORKTYPE=float3";
break;
case CV_8UC4:
option += " -D SRCTYPE=uchar4 -D CONVERT_SRCTYPE=convert_float4 -D WORKTYPE=float4";
break;
case CV_32FC4:
option += " -D SRCTYPE=float4 -D CONVERT_SRCTYPE= -D WORKTYPE=float4";
break;
default:
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
break;
}
switch(dst.type())
{
case CV_8UC1:
option += " -D DSTTYPE=uchar -D CONVERT_DSTTYPE=convert_uchar_sat";
break;
case CV_8UC2:
option += " -D DSTTYPE=uchar2 -D CONVERT_DSTTYPE=convert_uchar2_sat";
break;
case CV_8UC3:
option += " -D DSTTYPE=uchar3 -D CONVERT_DSTTYPE=convert_uchar3_sat";
break;
case CV_8UC4:
option += " -D DSTTYPE=uchar4 -D CONVERT_DSTTYPE=convert_uchar4_sat";
break;
case CV_32FC1:
option += " -D DSTTYPE=float -D CONVERT_DSTTYPE=";
break;
case CV_32FC2:
option += " -D DSTTYPE=float2 -D CONVERT_DSTTYPE=";
break;
case CV_32FC3:
option += " -D DSTTYPE=float3 -D CONVERT_DSTTYPE=";
break;
case CV_32FC4:
option += " -D DSTTYPE=float4 -D CONVERT_DSTTYPE=";
break;
default:
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
break;
}
switch(bordertype)
{
case cv::BORDER_CONSTANT:
option += " -D BORDER_CONSTANT";
break;
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT_101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
default:
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
break;
}
openCLExecuteKernel(src.clCxt, &filtering_sep_filter_singlepass, "sep_filter_singlepass", gt2, lt2, args,
-1, -1, option.c_str() );
}
////////////////////////////////////////////////////////////////////////////////////////////////////
// SeparableFilter
@ -790,6 +919,35 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter
return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
}
namespace
{
class SingleStepSeparableFilterEngine_GPU : public FilterEngine_GPU
{
public:
SingleStepSeparableFilterEngine_GPU( const Mat &rowKernel_, const Mat &columnKernel_, const int btype )
{
bordertype = btype;
rowKernel = rowKernel_;
columnKernel = columnKernel_;
}
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
{
normalizeROI(roi, Size(rowKernel.rows, columnKernel.rows), Point(-1,-1), src.size());
oclMat srcROI = src(roi);
oclMat dstROI = dst(roi);
sepFilter2D_SinglePass(src, dst, rowKernel, columnKernel, bordertype);
}
Mat rowKernel;
Mat columnKernel;
int bordertype;
};
}
static void GPUFilterBox(const oclMat &src, oclMat &dst,
Size &ksize, const Point anchor, const int borderType)
{
@ -1243,17 +1401,32 @@ Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, in
}
Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType,
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype)
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype, Size imgSize )
{
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
int cn = CV_MAT_CN(srcType);
int bdepth = std::max(std::max(sdepth, ddepth), CV_32F);
int bufType = CV_MAKETYPE(bdepth, cn);
Context* clCxt = Context::getContext();
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
//if image size is non-degenerate and large enough
//and if filter support is reasonable to satisfy larger local memory requirements,
//then we can use single pass routine to avoid extra runtime calls overhead
if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
(rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
imgSize.height > optimizedSepFilterLocalSize + (columnKernel.rows>>1) )
{
return Ptr<FilterEngine_GPU>(new SingleStepSeparableFilterEngine_GPU(rowKernel, columnKernel, bordertype));
}
else
{
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
return createSeparableFilter_GPU(rowFilter, columnFilter);
return createSeparableFilter_GPU(rowFilter, columnFilter);
}
}
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
@ -1277,16 +1450,16 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype);
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype, src.size());
f->apply(src, dst);
}
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType)
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType, Size imgSize )
{
Mat kx, ky;
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
return createSeparableLinearFilter_GPU(srcType, dstType,
kx, ky, Point(-1, -1), 0, borderType);
kx, ky, Point(-1, -1), 0, borderType, imgSize);
}
////////////////////////////////////////////////////////////////////////////////////////////////////
@ -1356,7 +1529,7 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
////////////////////////////////////////////////////////////////////////////////////////////////////
// Gaussian Filter
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype)
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype, Size imgSize)
{
int depth = CV_MAT_DEPTH(type);
@ -1383,7 +1556,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
else
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype);
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype, imgSize);
}
void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2, int bordertype)
@ -1419,7 +1592,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
dst.create(src.size(), src.type());
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype, src.size());
f->apply(src, dst);
}

View File

@ -48,154 +48,142 @@
using namespace cv;
using namespace cv::ocl;
// currently sort procedure on the host is more efficient
static bool use_cpu_sorter = true;
namespace
// compact structure for corners
struct DefCorner
{
enum SortMethod
float eig; //eigenvalue of corner
short x; //x coordinate of corner point
short y; //y coordinate of corner point
} ;
// compare procedure for corner
//it is used for sort on the host side
struct DefCornerCompare
{
CPU_STL,
BITONIC,
SELECTION
};
const int GROUP_SIZE = 256;
template<SortMethod method>
struct Sorter
{
//typedef EigType;
};
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
template<>
struct Sorter<CPU_STL>
{
typedef oclMat EigType;
static cv::Mutex cs;
static Mat mat_eig;
//prototype
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
bool operator()(const DefCorner a, const DefCorner b) const
{
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
return v1 > v2;
}
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
{
cv::AutoLock lock(cs);
//temporarily use STL's sort function
Mat mat_corners = corners;
mat_eig = eig_tex;
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
corners = mat_corners;
return a.eig > b.eig;
}
};
cv::Mutex Sorter<CPU_STL>::cs;
cv::Mat Sorter<CPU_STL>::mat_eig;
template<>
struct Sorter<BITONIC>
// sort corner point using opencl bitonicosrt implementation
static void sortCorners_caller(oclMat& corners, const int count)
{
typedef TextureCL EigType;
Context * cxt = Context::getContext();
int GS = count/2;
int LS = min(255,GS);
size_t globalThreads[3] = {GS, 1, 1};
size_t localThreads[3] = {LS, 1, 1};
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
// 2^numStages should be equal to count or the output is invalid
int numStages = 0;
for(int i = count; i > 1; i >>= 1)
{
Context * cxt = Context::getContext();
size_t globalThreads[3] = {count / 2, 1, 1};
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
// 2^numStages should be equal to count or the output is invalid
int numStages = 0;
for(int i = count; i > 1; i >>= 1)
++numStages;
}
const int argc = 4;
std::vector< std::pair<size_t, const void *> > args(argc);
std::string kernelname = "sortCorners_bitonicSort";
args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
args[1] = std::make_pair(sizeof(cl_int), (void *)&count);
for(int stage = 0; stage < numStages; ++stage)
{
args[2] = std::make_pair(sizeof(cl_int), (void *)&stage);
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
{
++numStages;
}
const int argc = 5;
std::vector< std::pair<size_t, const void *> > args(argc);
String kernelname = "sortCorners_bitonicSort";
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
for(int stage = 0; stage < numStages; ++stage)
{
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
{
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
}
};
}
template<>
struct Sorter<SELECTION>
{
typedef TextureCL EigType;
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
{
Context * cxt = Context::getContext();
size_t globalThreads[3] = {count, 1, 1};
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
std::vector< std::pair<size_t, const void *> > args;
//local
String kernelname = "sortCorners_selectionSortLocal";
int lds_size = GROUP_SIZE * sizeof(cl_float2);
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
args.push_back( std::make_pair( lds_size, (void*)NULL) );
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
//final
kernelname = "sortCorners_selectionSortFinal";
args.pop_back();
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
}
};
int findCorners_caller(
const TextureCL& eig,
const float threshold,
const oclMat& mask,
oclMat& corners,
const int max_count)
// find corners on matrix and put it into array
static void findCorners_caller(
const oclMat& eig_mat, //input matrix worth eigenvalues
oclMat& eigMinMax, //input with min and max values of eigenvalues
const float qualityLevel,
const oclMat& mask,
oclMat& corners, //output array with detected corners
oclMat& counter) //output value with number of detected corners, have to be 0 before call
{
String opt;
std::vector<int> k;
Context * cxt = Context::getContext();
std::vector< std::pair<size_t, const void*> > args;
String kernelname = "findCorners";
const int mask_strip = mask.step / mask.elemSize1();
oclMat g_counter(1, 1, CV_32SC1);
g_counter.setTo(0);
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(eig_mat.data)));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig ));
int src_pitch = (int)eig_mat.step;
args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eigMinMax.data ));
args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols ));
args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols ));
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&counter.data ));
size_t globalThreads[3] = {eig.cols, eig.rows, 1};
size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
size_t localThreads[3] = {16, 16, 1};
if(!mask.empty())
opt += " -D WITH_MASK=1";
const char * opt = mask.empty() ? "" : "-D WITH_MASK";
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
return std::min(Mat(g_counter).at<int>(0), max_count);
openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
}
static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
{
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
CV_Assert(groupnum != 0);
int dbsize = groupnum * 2 * src.elemSize();
ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
int all_cols = src.step / src.elemSize();
int pre_cols = (src.offset % src.step) / src.elemSize();
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
int invalid_cols = pre_cols + sec_cols;
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
int offset = src.offset / src.elemSize();
{// first parallel pass
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
size_t globalThreads[3] = {groupnum * 256, 1, 1};
size_t localThreads[3] = {256, 1, 1};
openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
args, -1, -1, "-D T=float -D DEPTH_5");
}
{// run final "serial" kernel to find accumulate results from threads and reset corner counter
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
size_t globalThreads[3] = {1, 1, 1};
size_t localThreads[3] = {1, 1, 1};
openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
args, -1, -1);
}
}
}//unnamed namespace
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
{
@ -205,67 +193,99 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
ensureSizeIsEnough(image.size(), CV_32F, eig_);
if (useHarrisDetector)
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
else
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
double maxVal = 0;
minMax(eig_, NULL, &maxVal);
ensureSizeIsEnough(1,1, CV_32SC1, counter_);
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
// find max eigenvalue and reset detected counters
minMaxEig_caller(eig_,eig_minmax_,counter_);
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
int total = findCorners_caller(
*eig_tex,
static_cast<float>(maxVal * qualityLevel),
// allocate buffer for kernels
int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
if(!use_cpu_sorter)
{ // round to 2^n
unsigned int n=1;
for(n=1;n<(unsigned int)corner_array_size;n<<=1) ;
corner_array_size = (int)n;
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
// set to 0 to be able use bitonic sort on whole 2^n array
tmpCorners_.setTo(0);
}
else
{
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
}
int total = tmpCorners_.cols; // by default the number of corner is full array
std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
//find points with high eigenvalue and put it into the output array
findCorners_caller(
eig_,
eig_minmax_,
static_cast<float>(qualityLevel),
mask,
tmpCorners_,
tmpCorners_.cols);
counter_);
if(!use_cpu_sorter)
{// sort detected corners on deivce side
sortCorners_caller(tmpCorners_, corner_array_size);
}
else
{// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL));
}
//blocking read whole corners array (sorted or not sorted)
openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner));
if (total == 0)
{
{// check for trivial case
corners.release();
return;
}
if(use_cpu_sorter)
{
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
}
else
{
//if total is power of 2
if(((total - 1) & (total)) == 0)
{
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
}
else
{
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
}
{// sort detected corners on cpu side.
tmp.resize(total);
std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
}
//estimate maximal size of final output array
int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
int D2 = (int)ceil(minDistance * minDistance);
// allocate output buffer
std::vector<Point2f> tmp2;
tmp2.reserve(total_max);
if (minDistance < 1)
{
Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
tmpCorners_(roi_range).copyTo(corners);
{// we have not distance restriction. then just copy with conversion maximal allowed points into output array
for(int i=0;i<total_max && tmp[i].eig>0.0f;++i)
{
tmp2.push_back(Point2f(tmp[i].x,tmp[i].y));
}
}
else
{
std::vector<Point2f> tmp(total);
downloadPoints(tmpCorners_, tmp);
std::vector<Point2f> tmp2;
tmp2.reserve(total);
{// we have distance restriction. then start coping to output array from the first element and check distance for each next one
const int cell_size = cvRound(minDistance);
const int grid_width = (image.cols + cell_size - 1) / cell_size;
const int grid_height = (image.rows + cell_size - 1) / cell_size;
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
for (int i = 0; i < total; ++i)
for (int i = 0; i < total ; ++i)
{
Point2f p = tmp[i];
DefCorner p = tmp[i];
if(p.eig<=0.0f)
break; // condition to stop that is needed for GPU bitonic sort usage.
bool good = true;
@ -287,40 +307,42 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
{
for (int xx = x1; xx <= x2; xx++)
{
std::vector<Point2f>& m = grid[yy * grid_width + xx];
if (!m.empty())
std::vector<Point2i>& m = grid[yy * grid_width + xx];
if (m.empty())
continue;
for(size_t j = 0; j < m.size(); j++)
{
for(size_t j = 0; j < m.size(); j++)
{
float dx = p.x - m[j].x;
float dy = p.y - m[j].y;
int dx = p.x - m[j].x;
int dy = p.y - m[j].y;
if (dx * dx + dy * dy < minDistance * minDistance)
{
good = false;
goto break_out;
}
if (dx * dx + dy * dy < D2)
{
good = false;
goto break_out_;
}
}
}
}
break_out:
break_out_:
if(good)
{
grid[y_cell * grid_width + x_cell].push_back(p);
grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x,p.y));
tmp2.push_back(p);
tmp2.push_back(Point2f(p.x,p.y));
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
break;
}
}
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
}
int final_size = static_cast<int>(tmp2.size());
if(final_size>0)
corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
else
corners.release();
}
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
{

View File

@ -866,16 +866,17 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
//setup local group size
localThreads[0] = 8;
localThreads[1] = 16;
//setup local group size for "pixel step" = 1
localThreads[0] = 16;
localThreads[1] = 32;
localThreads[2] = 1;
//init maximal number of workgroups
//calc maximal number of workgroups
int WGNumX = 1+(sizev[0].width /(localThreads[0]));
int WGNumY = 1+(sizev[0].height/(localThreads[1]));
int WGNumZ = loopcount;
int WGNum = 0; //accurate number of non -empty workgroups
int WGNumTotal = 0; //accurate number of non-empty workgroups
int WGNumSampled = 0; //accurate number of workgroups processed only 1/4 part of all pixels. it is made for large images with scale <= 2
oclMat oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U);
{
cl_int4* pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status);
@ -895,12 +896,16 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
if(gx>=(Width-cascade->orig_window_size.width))
continue; // no data to process
if(scaleinfo[z].factor<=2)
{
WGNumSampled++;
}
// save no-empty workgroup info into array
pWGInfo[WGNum].s[0] = scaleinfo[z].width_height;
pWGInfo[WGNum].s[1] = (gx << 16) | gy;
pWGInfo[WGNum].s[2] = scaleinfo[z].imgoff;
memcpy(&(pWGInfo[WGNum].s[3]),&(scaleinfo[z].factor),sizeof(float));
WGNum++;
pWGInfo[WGNumTotal].s[0] = scaleinfo[z].width_height;
pWGInfo[WGNumTotal].s[1] = (gx << 16) | gy;
pWGInfo[WGNumTotal].s[2] = scaleinfo[z].imgoff;
memcpy(&(pWGInfo[WGNumTotal].s[3]),&(scaleinfo[z].factor),sizeof(float));
WGNumTotal++;
}
}
}
@ -908,13 +913,8 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
pWGInfo = NULL;
}
// setup global sizes to have linear array of workgroups with WGNum size
globalThreads[0] = localThreads[0]*WGNum;
globalThreads[1] = localThreads[1];
globalThreads[2] = 1;
#define NODE_SIZE 12
// pack node info to have less memory loads
// pack node info to have less memory loads on the device side
oclMat oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U);
{
cl_int status;
@ -963,8 +963,6 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
options += format(" -D LSx=%d",localThreads[0]);
options += format(" -D LSy=%d",localThreads[1]);
options += format(" -D SPLITNODE=%d",splitnode);
options += format(" -D SPLITSTAGE=%d",splitstage);
options += format(" -D OUTPUTSZ=%d",outputsz);
@ -972,8 +970,39 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
// init candiate global count by 0
int pattern = 0;
openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, localThreads, args, -1, -1, options.c_str());
if(WGNumTotal>WGNumSampled)
{// small images and each pixel is processed
// setup global sizes to have linear array of workgroups with WGNum size
int pixelstep = 1;
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
globalThreads[0] = LS[0]*(WGNumTotal-WGNumSampled);
globalThreads[1] = LS[1];
globalThreads[2] = 1;
String options1 = options;
options1 += format(" -D PIXEL_STEP=%d",pixelstep);
options1 += format(" -D WGSTART=%d",WGNumSampled);
options1 += format(" -D LSx=%d",LS[0]);
options1 += format(" -D LSy=%d",LS[1]);
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options1.c_str());
}
if(WGNumSampled>0)
{// large images each 4th pixel is processed
// setup global sizes to have linear array of workgroups with WGNum size
int pixelstep = 2;
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
globalThreads[0] = LS[0]*WGNumSampled;
globalThreads[1] = LS[1];
globalThreads[2] = 1;
String options2 = options;
options2 += format(" -D PIXEL_STEP=%d",pixelstep);
options2 += format(" -D WGSTART=%d",0);
options2 += format(" -D LSx=%d",LS[0]);
options2 += format(" -D LSy=%d",LS[1]);
// execute face detector
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options2.c_str());
}
//read candidate buffer back and put it into host list
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
assert(candidate[0]<outputsz);

View File

@ -76,6 +76,11 @@ namespace cv
int cdescr_width;
int cdescr_height;
// A shift value and type that allows qangle to be different
// sizes on different hardware
int qangle_step_shift;
int qangle_type;
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y);
@ -153,6 +158,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
hog_device_cpu = true;
else
hog_device_cpu = false;
}
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
@ -213,7 +219,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
effect_size = img.size();
grad.create(img.size(), CV_32FC2);
qangle.create(img.size(), CV_8UC2);
qangle.create(img.size(), hog::qangle_type);
const size_t block_hist_size = getBlockHistogramSize();
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
@ -1606,6 +1612,16 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
int descr_size = descr_width * nblocks_win_y;
cdescr_size = descr_size;
qangle_type = CV_8UC2;
qangle_step_shift = 0;
// Some Intel devices have low single-byte access performance,
// so we change the datatype here.
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
{
qangle_type = CV_32SC2;
qangle_step_shift = 2;
}
}
void cv::ocl::device::hog::compute_hists(int nbins,
@ -1627,7 +1643,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
int blocks_total = img_block_width * img_block_height;
int grad_quadstep = grad.step >> 2;
int qangle_step = qangle.step;
int qangle_step = qangle.step >> qangle_step_shift;
int blocks_in_group = 4;
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
@ -1892,7 +1908,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
char correctGamma = (correct_gamma) ? 1 : 0;
int img_step = img.step;
int grad_quadstep = grad.step >> 3;
int qangle_step = qangle.step >> 1;
int qangle_step = qangle.step >> (1 + qangle_step_shift);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
@ -1927,7 +1943,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
char correctGamma = (correct_gamma) ? 1 : 0;
int img_step = img.step >> 2;
int grad_quadstep = grad.step >> 3;
int qangle_step = qangle.step >> 1;
int qangle_step = qangle.step >> (1 + qangle_step_shift);
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));

View File

@ -1035,67 +1035,117 @@ namespace cv
else
scale = 1. / scale;
if (ksize > 0)
const int sobel_lsz = 16;
if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
(ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
src.wholerows > sobel_lsz + (ksize>>1) &&
src.wholecols > sobel_lsz + (ksize>>1))
{
Context* clCxt = Context::getContext();
if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 &&
src.cols % 8 == 0 && src.rows % 8 == 0 &&
ksize==3 &&
(borderType ==cv::BORDER_REFLECT ||
borderType == cv::BORDER_REPLICATE ||
borderType ==cv::BORDER_REFLECT101 ||
borderType ==cv::BORDER_WRAP))
Dx.create(src.size(), CV_32FC1);
Dy.create(src.size(), CV_32FC1);
CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
unsigned int src_pitch = src.step;
unsigned int Dx_pitch = Dx.step;
unsigned int Dy_pitch = Dy.step;
int src_offset_x = (src.offset % src.step) / src.elemSize();
int src_offset_y = src.offset / src.step;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.rows ));
args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
switch(src.type())
{
Dx.create(src.size(), CV_32FC1);
Dy.create(src.size(), CV_32FC1);
const unsigned int block_x = 8;
const unsigned int block_y = 8;
unsigned int src_pitch = src.step;
unsigned int dst_pitch = Dx.cols;
float _scale = scale;
std::vector<std::pair<size_t , const void *> > args;
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
String option = "-D BLK_X=8 -D BLK_Y=8";
switch(borderType)
{
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
case CV_8UC1:
option += " -D SRCTYPE=uchar";
break;
case CV_32FC1:
option += " -D SRCTYPE=float";
break;
}
else
switch(borderType)
{
case cv::BORDER_CONSTANT:
option += " -D BORDER_CONSTANT";
break;
case cv::BORDER_REPLICATE:
option += " -D BORDER_REPLICATE";
break;
case cv::BORDER_REFLECT:
option += " -D BORDER_REFLECT";
break;
case cv::BORDER_REFLECT101:
option += " -D BORDER_REFLECT_101";
break;
case cv::BORDER_WRAP:
option += " -D BORDER_WRAP";
break;
default:
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
break;
}
String kernel_name;
switch(ksize)
{
case -1:
option += " -D SCHARR";
kernel_name = "sobel3";
break;
case 3:
kernel_name = "sobel3";
break;
case 5:
kernel_name = "sobel5";
break;
case 7:
kernel_name = "sobel7";
break;
default:
CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
break;
}
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
}
else
{
if (ksize > 0)
{
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
}
}
else
{
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
else
{
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
}
}
CV_Assert(Dx.offset == 0 && Dy.offset == 0);
}

View File

@ -63,7 +63,7 @@ inline float sum(float val)
return val;
}
static float clamp1(float var, float learningRate, float diff, float minVar)
inline float clamp1(float var, float learningRate, float diff, float minVar)
{
return fmax(var + learningRate * (diff * diff - var), minVar);
}
@ -96,7 +96,7 @@ inline float sum(const float4 val)
return (val.x + val.y + val.z);
}
static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
inline void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
{
float4 val = ptr[(k * rows + y) * ptr_step + x];
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
@ -104,7 +104,7 @@ static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_s
}
static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
inline float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
{
float4 result;
result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
@ -128,7 +128,7 @@ typedef struct
uchar c_shadowVal;
} con_srtuct_t;
static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
inline void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
{
float val = ptr[(k * rows + y) * ptr_step + x];
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,185 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2013, Intel Corporation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
///////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////Macro for border type////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////////////
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define EXTRAPOLATE(x, maxV)
#elif defined BORDER_REPLICATE
//aaaaaa|abcdefgh|hhhhhhh
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = max(min((x), (maxV) - 1), 0); \
}
#elif defined BORDER_WRAP
//cdefgh|abcdefgh|abcdefg
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = ( (x) + (maxV) ) % (maxV); \
}
#elif defined BORDER_REFLECT
//fedcba|abcdefgh|hgfedcb
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
}
#elif defined BORDER_REFLECT_101
//gfedcb|abcdefgh|gfedcba
#define EXTRAPOLATE(x, maxV) \
{ \
(x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
}
#else
#error No extrapolation method
#endif
#define SRC(_x,_y) CONVERT_SRCTYPE(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
#ifdef BORDER_CONSTANT
//CCCCCC|abcdefgh|CCCCCCC
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
#else
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
#endif
#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
//horizontal and vertical filter kernels
//should be defined on host during compile time to avoid overhead
__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};
__constant uint mat_kernelY[] = {KERNEL_MATRIX_Y};
__kernel __attribute__((reqd_work_group_size(BLK_X,BLK_Y,1))) void sep_filter_singlepass
(
__global uchar* Src,
const uint SrcPitch,
const int srcOffsetX,
const int srcOffsetY,
__global uchar* Dst,
const int DstOffset,
const uint DstPitch,
int width,
int height,
int dstWidth,
int dstHeight
)
{
//RADIUSX, RADIUSY are filter dimensions
//BLK_X, BLK_Y are local wrogroup sizes
//all these should be defined on host during compile time
//first lsmem array for source pixels used in first pass,
//second lsmemDy for storing first pass results
__local WORKTYPE lsmem[BLK_Y+2*RADIUSY][BLK_X+2*RADIUSX];
__local WORKTYPE lsmemDy[BLK_Y][BLK_X+2*RADIUSX];
//get local and global ids - used as image and local memory array indexes
int lix = get_local_id(0);
int liy = get_local_id(1);
int x = (int)get_global_id(0);
int y = (int)get_global_id(1);
//calculate pixel position in source image taking image offset into account
int srcX = x + srcOffsetX - RADIUSX;
int srcY = y + srcOffsetY - RADIUSY;
int xb = srcX;
int yb = srcY;
//extrapolate coordinates, if needed
//and read my own source pixel into local memory
//with account for extra border pixels, which will be read by starting workitems
int clocY = liy;
int cSrcY = srcY;
do
{
int yb = cSrcY;
EXTRAPOLATE(yb, (height));
int clocX = lix;
int cSrcX = srcX;
do
{
int xb = cSrcX;
EXTRAPOLATE(xb,(width));
lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
clocX += BLK_X;
cSrcX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
clocY += BLK_Y;
cSrcY += BLK_Y;
}
while(clocY < BLK_Y+(RADIUSY*2));
barrier(CLK_LOCAL_MEM_FENCE);
//do vertical filter pass
//and store intermediate results to second local memory array
int i;
WORKTYPE sum = 0.0f;
int clocX = lix;
do
{
sum = 0.0f;
for(i=0; i<=2*RADIUSY; i++)
sum = mad(lsmem[liy+i][clocX], as_float(mat_kernelY[i]), sum);
lsmemDy[liy][clocX] = sum;
clocX += BLK_X;
}
while(clocX < BLK_X+(RADIUSX*2));
barrier(CLK_LOCAL_MEM_FENCE);
//if this pixel happened to be out of image borders because of global size rounding,
//then just return
if( x >= dstWidth || y >=dstHeight ) return;
//do second horizontal filter pass
//and calculate final result
sum = 0.0f;
for(i=0; i<=2*RADIUSX; i++)
sum = mad(lsmemDy[liy][lix+i], as_float(mat_kernelX[i]), sum);
//store result into destination image
DST(x,y) = CONVERT_DSTTYPE(sum);
}

View File

@ -126,13 +126,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
)
{
// this version used information provided for each workgroup
// no empty WG
int gid = (int)get_group_id(0);
int lid_x = (int)get_local_id(0);
int lid_y = (int)get_local_id(1);
int lid = lid_y*LSx+lid_x;
int4 WGInfo = pWGInfo[gid];
int4 WGInfo = pWGInfo[WGSTART+gid];
int GroupX = (WGInfo.y >> 16)&0xFFFF;
int GroupY = (WGInfo.y >> 0 )& 0xFFFF;
int Width = (WGInfo.x >> 16)&0xFFFF;
@ -140,8 +138,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int ImgOffset = WGInfo.z;
float ScaleFactor = as_float(WGInfo.w);
#define DATA_SIZE_X (LSx+WND_SIZE_X)
#define DATA_SIZE_Y (LSy+WND_SIZE_Y)
#define DATA_SIZE_X (PIXEL_STEP*LSx+WND_SIZE_X)
#define DATA_SIZE_Y (PIXEL_STEP*LSy+WND_SIZE_Y)
#define DATA_SIZE (DATA_SIZE_X*DATA_SIZE_Y)
local int SumL[DATA_SIZE];
@ -165,9 +163,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int4 info1 = p;
int4 info2 = pq;
{
int xl = lid_x;
int yl = lid_y;
// calc processed ROI coordinate in local mem
int xl = lid_x*PIXEL_STEP;
int yl = lid_y*PIXEL_STEP;
{// calc variance_norm_factor for all stages
int OffsetLocal = yl * DATA_SIZE_X + xl;
int OffsetGlobal = (GroupY+yl)* pixelstep + (GroupX+xl);
@ -194,13 +194,13 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int result = (1.0f>0.0f);
for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
{// iterate until candidate is exist
{// iterate until candidate is valid
float stage_sum = 0.0f;
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
int lcl_off = (yl*DATA_SIZE_X)+(xl);
int stagecount = stageinfo->count;
float stagethreshold = stageinfo->threshold;
int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x);
for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
{
// simple macro to extract shorts from int
@ -212,7 +212,7 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int4 n1 = pN[1];
int4 n2 = pN[2];
float nodethreshold = as_float(n2.y) * variance_norm_factor;
// calc sum of intensity pixels according to node information
// calc sum of intensity pixels according to classifier node information
float classsum =
(SumL[M0(n0.x)+lcl_off] - SumL[M1(n0.x)+lcl_off] - SumL[M0(n0.y)+lcl_off] + SumL[M1(n0.y)+lcl_off]) * as_float(n1.z) +
(SumL[M0(n0.z)+lcl_off] - SumL[M1(n0.z)+lcl_off] - SumL[M0(n0.w)+lcl_off] + SumL[M1(n0.w)+lcl_off]) * as_float(n1.w) +
@ -228,8 +228,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
int index = 1+atomic_inc((volatile global int*)candidate); //get index to write global data with face info
if(index<OUTPUTSZ)
{
int x = GroupX+lid_x;
int y = GroupY+lid_y;
int x = GroupX+xl;
int y = GroupY+yl;
int4 candidate_result;
candidate_result.x = convert_int_rtn(x*ScaleFactor);
candidate_result.y = convert_int_rtn(y*ScaleFactor);

View File

@ -381,8 +381,8 @@ struct PtrStepSz {
int step;
int rows, cols;
};
inline int get(struct PtrStepSz data, int y, int x) { return *((__global int *)((__global char*)data.ptr + data.step * y + sizeof(int) * x)); }
inline void set(struct PtrStepSz data, int y, int x, int value) { *((__global int *)((__global char*)data.ptr + data.step * y + sizeof(int) * x)) = value; }
inline int get(struct PtrStepSz data, int y, int x) { return *((__global int *)((__global char*)data.ptr + data.step * (y + 1) + sizeof(int) * (x + 1))); }
inline void set(struct PtrStepSz data, int y, int x, int value) { *((__global int *)((__global char*)data.ptr + data.step * (y + 1) + sizeof(int) * (x + 1))) = value; }
//////////////////////////////////////////////////////////////////////////////////////////
// do Hysteresis for pixel whose edge type is 1
@ -494,7 +494,7 @@ edgesHysteresisLocal
}
}
#else
struct PtrStepSz map = {((__global int *)((__global char*)map_ptr + map_offset)), map_step, rows, cols};
struct PtrStepSz map = {((__global int *)((__global char*)map_ptr + map_offset)), map_step, rows + 1, cols + 1};
__local int smem[18][18];
@ -507,13 +507,13 @@ edgesHysteresisLocal
smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? get(map, y, x) : 0;
if (threadIdx.y == 0)
smem[0][threadIdx.x + 1] = y > 0 ? get(map, y - 1, x) : 0;
smem[0][threadIdx.x + 1] = x < map.cols ? get(map, y - 1, x) : 0;
if (threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? get(map, y + 1, x) : 0;
if (threadIdx.x == 0)
smem[threadIdx.y + 1][0] = x > 0 ? get(map, y, x - 1) : 0;
smem[threadIdx.y + 1][0] = y < map.rows ? get(map, y, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1)
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols ? get(map, y, x + 1) : 0;
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols && y < map.rows ? get(map, y, x + 1) : 0;
if (threadIdx.x == 0 && threadIdx.y == 0)
smem[0][0] = y > 0 && x > 0 ? get(map, y - 1, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
@ -525,7 +525,7 @@ edgesHysteresisLocal
barrier(CLK_LOCAL_MEM_FENCE);
if (x >= map.cols || y >= map.rows)
if (x >= cols || y >= rows)
return;
int n;
@ -576,7 +576,7 @@ edgesHysteresisLocal
if (n > 0)
{
const int ind = atomic_inc(counter);
st[ind] = (ushort2)(x, y);
st[ind] = (ushort2)(x + 1, y + 1);
}
#endif
}

Some files were not shown because too many files have changed in this diff Show More