merge with upstream
This commit is contained in:
commit
e17da9a843
502
3rdparty/include/MultiMon.h
vendored
502
3rdparty/include/MultiMon.h
vendored
@ -1,502 +0,0 @@
|
||||
//=============================================================================
|
||||
//
|
||||
// multimon.h -- Stub module that fakes multiple monitor apis on Win32 OSes
|
||||
// without them.
|
||||
//
|
||||
// By using this header your code will get back default values from
|
||||
// GetSystemMetrics() for new metrics, and the new multimonitor APIs
|
||||
// will act like only one display is present on a Win32 OS without
|
||||
// multimonitor APIs.
|
||||
//
|
||||
// Exactly one source must include this with COMPILE_MULTIMON_STUBS defined.
|
||||
//
|
||||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
//
|
||||
//=============================================================================
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" { // Assume C declarations for C++
|
||||
#endif // __cplusplus
|
||||
|
||||
//
|
||||
// If we are building with Win95/NT4 headers, we need to declare
|
||||
// the multimonitor-related metrics and APIs ourselves.
|
||||
//
|
||||
#ifndef SM_CMONITORS
|
||||
|
||||
#define SM_XVIRTUALSCREEN 76
|
||||
#define SM_YVIRTUALSCREEN 77
|
||||
#define SM_CXVIRTUALSCREEN 78
|
||||
#define SM_CYVIRTUALSCREEN 79
|
||||
#define SM_CMONITORS 80
|
||||
#define SM_SAMEDISPLAYFORMAT 81
|
||||
|
||||
// HMONITOR is already declared if WINVER >= 0x0500 in windef.h
|
||||
// This is for components built with an older version number.
|
||||
//
|
||||
#if !defined(HMONITOR_DECLARED) && (WINVER < 0x0500)
|
||||
DECLARE_HANDLE(HMONITOR);
|
||||
#define HMONITOR_DECLARED
|
||||
#endif
|
||||
|
||||
#define MONITOR_DEFAULTTONULL 0x00000000
|
||||
#define MONITOR_DEFAULTTOPRIMARY 0x00000001
|
||||
#define MONITOR_DEFAULTTONEAREST 0x00000002
|
||||
|
||||
#define MONITORINFOF_PRIMARY 0x00000001
|
||||
|
||||
typedef struct tagMONITORINFO
|
||||
{
|
||||
DWORD cbSize;
|
||||
RECT rcMonitor;
|
||||
RECT rcWork;
|
||||
DWORD dwFlags;
|
||||
} MONITORINFO, *LPMONITORINFO;
|
||||
|
||||
#ifndef CCHDEVICENAME
|
||||
#define CCHDEVICENAME 32
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
typedef struct tagMONITORINFOEXA : public tagMONITORINFO
|
||||
{
|
||||
CHAR szDevice[CCHDEVICENAME];
|
||||
} MONITORINFOEXA, *LPMONITORINFOEXA;
|
||||
typedef struct tagMONITORINFOEXW : public tagMONITORINFO
|
||||
{
|
||||
WCHAR szDevice[CCHDEVICENAME];
|
||||
} MONITORINFOEXW, *LPMONITORINFOEXW;
|
||||
#ifdef UNICODE
|
||||
typedef MONITORINFOEXW MONITORINFOEX;
|
||||
typedef LPMONITORINFOEXW LPMONITORINFOEX;
|
||||
#else
|
||||
typedef MONITORINFOEXA MONITORINFOEX;
|
||||
typedef LPMONITORINFOEXA LPMONITORINFOEX;
|
||||
#endif // UNICODE
|
||||
#else // ndef __cplusplus
|
||||
typedef struct tagMONITORINFOEXA
|
||||
{
|
||||
MONITORINFO;
|
||||
CHAR szDevice[CCHDEVICENAME];
|
||||
} MONITORINFOEXA, *LPMONITORINFOEXA;
|
||||
typedef struct tagMONITORINFOEXW
|
||||
{
|
||||
MONITORINFO;
|
||||
WCHAR szDevice[CCHDEVICENAME];
|
||||
} MONITORINFOEXW, *LPMONITORINFOEXW;
|
||||
#ifdef UNICODE
|
||||
typedef MONITORINFOEXW MONITORINFOEX;
|
||||
typedef LPMONITORINFOEXW LPMONITORINFOEX;
|
||||
#else
|
||||
typedef MONITORINFOEXA MONITORINFOEX;
|
||||
typedef LPMONITORINFOEXA LPMONITORINFOEX;
|
||||
#endif // UNICODE
|
||||
#endif
|
||||
|
||||
typedef BOOL (CALLBACK* MONITORENUMPROC)(HMONITOR, HDC, LPRECT, LPARAM);
|
||||
|
||||
#ifndef DISPLAY_DEVICE_ATTACHED_TO_DESKTOP
|
||||
typedef struct _DISPLAY_DEVICEA {
|
||||
DWORD cb;
|
||||
CHAR DeviceName[32];
|
||||
CHAR DeviceString[128];
|
||||
DWORD StateFlags;
|
||||
CHAR DeviceID[128];
|
||||
CHAR DeviceKey[128];
|
||||
} DISPLAY_DEVICEA, *PDISPLAY_DEVICEA, *LPDISPLAY_DEVICEA;
|
||||
typedef struct _DISPLAY_DEVICEW {
|
||||
DWORD cb;
|
||||
WCHAR DeviceName[32];
|
||||
WCHAR DeviceString[128];
|
||||
DWORD StateFlags;
|
||||
WCHAR DeviceID[128];
|
||||
WCHAR DeviceKey[128];
|
||||
} DISPLAY_DEVICEW, *PDISPLAY_DEVICEW, *LPDISPLAY_DEVICEW;
|
||||
#ifdef UNICODE
|
||||
typedef DISPLAY_DEVICEW DISPLAY_DEVICE;
|
||||
typedef PDISPLAY_DEVICEW PDISPLAY_DEVICE;
|
||||
typedef LPDISPLAY_DEVICEW LPDISPLAY_DEVICE;
|
||||
#else
|
||||
typedef DISPLAY_DEVICEA DISPLAY_DEVICE;
|
||||
typedef PDISPLAY_DEVICEA PDISPLAY_DEVICE;
|
||||
typedef LPDISPLAY_DEVICEA LPDISPLAY_DEVICE;
|
||||
#endif // UNICODE
|
||||
|
||||
#define DISPLAY_DEVICE_ATTACHED_TO_DESKTOP 0x00000001
|
||||
#define DISPLAY_DEVICE_MULTI_DRIVER 0x00000002
|
||||
#define DISPLAY_DEVICE_PRIMARY_DEVICE 0x00000004
|
||||
#define DISPLAY_DEVICE_MIRRORING_DRIVER 0x00000008
|
||||
#define DISPLAY_DEVICE_VGA_COMPATIBLE 0x00000010
|
||||
#endif
|
||||
|
||||
#endif // SM_CMONITORS
|
||||
|
||||
#undef GetMonitorInfo
|
||||
#undef GetSystemMetrics
|
||||
#undef MonitorFromWindow
|
||||
#undef MonitorFromRect
|
||||
#undef MonitorFromPoint
|
||||
#undef EnumDisplayMonitors
|
||||
#undef EnumDisplayDevices
|
||||
|
||||
//
|
||||
// Define COMPILE_MULTIMON_STUBS to compile the stubs;
|
||||
// otherwise, you get the declarations.
|
||||
//
|
||||
#ifdef COMPILE_MULTIMON_STUBS
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// Implement the API stubs.
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
#ifndef _MULTIMON_USE_SECURE_CRT
|
||||
#if defined(__GOT_SECURE_LIB__) && __GOT_SECURE_LIB__ >= 200402L
|
||||
#define _MULTIMON_USE_SECURE_CRT 1
|
||||
#else
|
||||
#define _MULTIMON_USE_SECURE_CRT 0
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef MULTIMON_FNS_DEFINED
|
||||
|
||||
int (WINAPI* g_pfnGetSystemMetrics)(int) = NULL;
|
||||
HMONITOR (WINAPI* g_pfnMonitorFromWindow)(HWND, DWORD) = NULL;
|
||||
HMONITOR (WINAPI* g_pfnMonitorFromRect)(LPCRECT, DWORD) = NULL;
|
||||
HMONITOR (WINAPI* g_pfnMonitorFromPoint)(POINT, DWORD) = NULL;
|
||||
BOOL (WINAPI* g_pfnGetMonitorInfo)(HMONITOR, LPMONITORINFO) = NULL;
|
||||
BOOL (WINAPI* g_pfnEnumDisplayMonitors)(HDC, LPCRECT, MONITORENUMPROC, LPARAM) = NULL;
|
||||
BOOL (WINAPI* g_pfnEnumDisplayDevices)(PVOID, DWORD, PDISPLAY_DEVICE,DWORD) = NULL;
|
||||
BOOL g_fMultiMonInitDone = FALSE;
|
||||
BOOL g_fMultimonPlatformNT = FALSE;
|
||||
|
||||
#endif
|
||||
|
||||
BOOL IsPlatformNT()
|
||||
{
|
||||
OSVERSIONINFOA osvi = {0};
|
||||
osvi.dwOSVersionInfoSize = sizeof(osvi);
|
||||
GetVersionExA((OSVERSIONINFOA*)&osvi);
|
||||
return (VER_PLATFORM_WIN32_NT == osvi.dwPlatformId);
|
||||
}
|
||||
|
||||
BOOL InitMultipleMonitorStubs(void)
|
||||
{
|
||||
HMODULE hUser32;
|
||||
if (g_fMultiMonInitDone)
|
||||
{
|
||||
return g_pfnGetMonitorInfo != NULL;
|
||||
}
|
||||
|
||||
g_fMultimonPlatformNT = IsPlatformNT();
|
||||
hUser32 = GetModuleHandle(TEXT("USER32"));
|
||||
if (hUser32 &&
|
||||
(*(FARPROC*)&g_pfnGetSystemMetrics = GetProcAddress(hUser32,"GetSystemMetrics")) != NULL &&
|
||||
(*(FARPROC*)&g_pfnMonitorFromWindow = GetProcAddress(hUser32,"MonitorFromWindow")) != NULL &&
|
||||
(*(FARPROC*)&g_pfnMonitorFromRect = GetProcAddress(hUser32,"MonitorFromRect")) != NULL &&
|
||||
(*(FARPROC*)&g_pfnMonitorFromPoint = GetProcAddress(hUser32,"MonitorFromPoint")) != NULL &&
|
||||
(*(FARPROC*)&g_pfnEnumDisplayMonitors = GetProcAddress(hUser32,"EnumDisplayMonitors")) != NULL &&
|
||||
#ifdef UNICODE
|
||||
(*(FARPROC*)&g_pfnEnumDisplayDevices = GetProcAddress(hUser32,"EnumDisplayDevicesW")) != NULL &&
|
||||
(*(FARPROC*)&g_pfnGetMonitorInfo = g_fMultimonPlatformNT ? GetProcAddress(hUser32,"GetMonitorInfoW") :
|
||||
GetProcAddress(hUser32,"GetMonitorInfoA")) != NULL
|
||||
#else
|
||||
(*(FARPROC*)&g_pfnGetMonitorInfo = GetProcAddress(hUser32,"GetMonitorInfoA")) != NULL &&
|
||||
(*(FARPROC*)&g_pfnEnumDisplayDevices = GetProcAddress(hUser32,"EnumDisplayDevicesA")) != NULL
|
||||
#endif
|
||||
) {
|
||||
g_fMultiMonInitDone = TRUE;
|
||||
return TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_pfnGetSystemMetrics = NULL;
|
||||
g_pfnMonitorFromWindow = NULL;
|
||||
g_pfnMonitorFromRect = NULL;
|
||||
g_pfnMonitorFromPoint = NULL;
|
||||
g_pfnGetMonitorInfo = NULL;
|
||||
g_pfnEnumDisplayMonitors = NULL;
|
||||
g_pfnEnumDisplayDevices = NULL;
|
||||
|
||||
g_fMultiMonInitDone = TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
//
|
||||
// fake implementations of Monitor APIs that work with the primary display
|
||||
// no special parameter validation is made since these run in client code
|
||||
//
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
int WINAPI
|
||||
xGetSystemMetrics(int nIndex)
|
||||
{
|
||||
if (InitMultipleMonitorStubs())
|
||||
return g_pfnGetSystemMetrics(nIndex);
|
||||
|
||||
switch (nIndex)
|
||||
{
|
||||
case SM_CMONITORS:
|
||||
case SM_SAMEDISPLAYFORMAT:
|
||||
return 1;
|
||||
|
||||
case SM_XVIRTUALSCREEN:
|
||||
case SM_YVIRTUALSCREEN:
|
||||
return 0;
|
||||
|
||||
case SM_CXVIRTUALSCREEN:
|
||||
nIndex = SM_CXSCREEN;
|
||||
break;
|
||||
|
||||
case SM_CYVIRTUALSCREEN:
|
||||
nIndex = SM_CYSCREEN;
|
||||
break;
|
||||
}
|
||||
|
||||
return GetSystemMetrics(nIndex);
|
||||
}
|
||||
|
||||
#define xPRIMARY_MONITOR ((HMONITOR)0x12340042)
|
||||
|
||||
HMONITOR WINAPI
|
||||
xMonitorFromPoint(POINT ptScreenCoords, DWORD dwFlags)
|
||||
{
|
||||
if (InitMultipleMonitorStubs())
|
||||
return g_pfnMonitorFromPoint(ptScreenCoords, dwFlags);
|
||||
|
||||
if ((dwFlags & (MONITOR_DEFAULTTOPRIMARY | MONITOR_DEFAULTTONEAREST)) ||
|
||||
((ptScreenCoords.x >= 0) &&
|
||||
(ptScreenCoords.x < GetSystemMetrics(SM_CXSCREEN)) &&
|
||||
(ptScreenCoords.y >= 0) &&
|
||||
(ptScreenCoords.y < GetSystemMetrics(SM_CYSCREEN))))
|
||||
{
|
||||
return xPRIMARY_MONITOR;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
HMONITOR WINAPI
|
||||
xMonitorFromRect(LPCRECT lprcScreenCoords, DWORD dwFlags)
|
||||
{
|
||||
if (InitMultipleMonitorStubs())
|
||||
return g_pfnMonitorFromRect(lprcScreenCoords, dwFlags);
|
||||
|
||||
if ((dwFlags & (MONITOR_DEFAULTTOPRIMARY | MONITOR_DEFAULTTONEAREST)) ||
|
||||
((lprcScreenCoords->right > 0) &&
|
||||
(lprcScreenCoords->bottom > 0) &&
|
||||
(lprcScreenCoords->left < GetSystemMetrics(SM_CXSCREEN)) &&
|
||||
(lprcScreenCoords->top < GetSystemMetrics(SM_CYSCREEN))))
|
||||
{
|
||||
return xPRIMARY_MONITOR;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
HMONITOR WINAPI
|
||||
xMonitorFromWindow(HWND hWnd, DWORD dwFlags)
|
||||
{
|
||||
WINDOWPLACEMENT wp;
|
||||
|
||||
if (InitMultipleMonitorStubs())
|
||||
return g_pfnMonitorFromWindow(hWnd, dwFlags);
|
||||
|
||||
if (dwFlags & (MONITOR_DEFAULTTOPRIMARY | MONITOR_DEFAULTTONEAREST))
|
||||
return xPRIMARY_MONITOR;
|
||||
|
||||
if (IsIconic(hWnd) ?
|
||||
GetWindowPlacement(hWnd, &wp) :
|
||||
GetWindowRect(hWnd, &wp.rcNormalPosition)) {
|
||||
|
||||
return xMonitorFromRect(&wp.rcNormalPosition, dwFlags);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BOOL WINAPI
|
||||
xGetMonitorInfo(HMONITOR hMonitor, __inout LPMONITORINFO lpMonitorInfo)
|
||||
{
|
||||
RECT rcWork;
|
||||
|
||||
if (InitMultipleMonitorStubs())
|
||||
{
|
||||
BOOL f = g_pfnGetMonitorInfo(hMonitor, lpMonitorInfo);
|
||||
#ifdef UNICODE
|
||||
if (f && !g_fMultimonPlatformNT && (lpMonitorInfo->cbSize >= sizeof(MONITORINFOEX)))
|
||||
{
|
||||
MultiByteToWideChar(CP_ACP, 0,
|
||||
(LPSTR)((MONITORINFOEX*)lpMonitorInfo)->szDevice, -1,
|
||||
((MONITORINFOEX*)lpMonitorInfo)->szDevice, (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)));
|
||||
}
|
||||
#endif
|
||||
return f;
|
||||
}
|
||||
|
||||
if ((hMonitor == xPRIMARY_MONITOR) &&
|
||||
lpMonitorInfo &&
|
||||
(lpMonitorInfo->cbSize >= sizeof(MONITORINFO)) &&
|
||||
SystemParametersInfoA(SPI_GETWORKAREA, 0, &rcWork, 0))
|
||||
{
|
||||
lpMonitorInfo->rcMonitor.left = 0;
|
||||
lpMonitorInfo->rcMonitor.top = 0;
|
||||
lpMonitorInfo->rcMonitor.right = GetSystemMetrics(SM_CXSCREEN);
|
||||
lpMonitorInfo->rcMonitor.bottom = GetSystemMetrics(SM_CYSCREEN);
|
||||
lpMonitorInfo->rcWork = rcWork;
|
||||
lpMonitorInfo->dwFlags = MONITORINFOF_PRIMARY;
|
||||
|
||||
if (lpMonitorInfo->cbSize >= sizeof(MONITORINFOEX))
|
||||
{
|
||||
#ifdef UNICODE
|
||||
MultiByteToWideChar(CP_ACP, 0, "DISPLAY", -1, ((MONITORINFOEX*)lpMonitorInfo)->szDevice, (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)));
|
||||
#else // UNICODE
|
||||
#if _MULTIMON_USE_SECURE_CRT
|
||||
strncpy_s(((MONITORINFOEX*)lpMonitorInfo)->szDevice, (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)), TEXT("DISPLAY"), (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)) - 1);
|
||||
#else
|
||||
lstrcpyn(((MONITORINFOEX*)lpMonitorInfo)->szDevice, TEXT("DISPLAY"), (sizeof(((MONITORINFOEX*)lpMonitorInfo)->szDevice)/sizeof(TCHAR)));
|
||||
#endif // _MULTIMON_USE_SECURE_CRT
|
||||
#endif // UNICODE
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
BOOL WINAPI
|
||||
xEnumDisplayMonitors(
|
||||
HDC hdcOptionalForPainting,
|
||||
LPCRECT lprcEnumMonitorsThatIntersect,
|
||||
MONITORENUMPROC lpfnEnumProc,
|
||||
LPARAM dwData)
|
||||
{
|
||||
RECT rcLimit;
|
||||
|
||||
if (InitMultipleMonitorStubs()) {
|
||||
return g_pfnEnumDisplayMonitors(
|
||||
hdcOptionalForPainting,
|
||||
lprcEnumMonitorsThatIntersect,
|
||||
lpfnEnumProc,
|
||||
dwData);
|
||||
}
|
||||
|
||||
if (!lpfnEnumProc)
|
||||
return FALSE;
|
||||
|
||||
rcLimit.left = 0;
|
||||
rcLimit.top = 0;
|
||||
rcLimit.right = GetSystemMetrics(SM_CXSCREEN);
|
||||
rcLimit.bottom = GetSystemMetrics(SM_CYSCREEN);
|
||||
|
||||
if (hdcOptionalForPainting)
|
||||
{
|
||||
RECT rcClip;
|
||||
POINT ptOrg;
|
||||
|
||||
switch (GetClipBox(hdcOptionalForPainting, &rcClip))
|
||||
{
|
||||
default:
|
||||
if (!GetDCOrgEx(hdcOptionalForPainting, &ptOrg))
|
||||
return FALSE;
|
||||
|
||||
OffsetRect(&rcLimit, -ptOrg.x, -ptOrg.y);
|
||||
if (IntersectRect(&rcLimit, &rcLimit, &rcClip) &&
|
||||
(!lprcEnumMonitorsThatIntersect ||
|
||||
IntersectRect(&rcLimit, &rcLimit, lprcEnumMonitorsThatIntersect))) {
|
||||
|
||||
break;
|
||||
}
|
||||
//fall thru
|
||||
case NULLREGION:
|
||||
return TRUE;
|
||||
case ERROR:
|
||||
return FALSE;
|
||||
}
|
||||
} else {
|
||||
if ( lprcEnumMonitorsThatIntersect &&
|
||||
!IntersectRect(&rcLimit, &rcLimit, lprcEnumMonitorsThatIntersect)) {
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
return lpfnEnumProc(
|
||||
xPRIMARY_MONITOR,
|
||||
hdcOptionalForPainting,
|
||||
&rcLimit,
|
||||
dwData);
|
||||
}
|
||||
|
||||
BOOL WINAPI
|
||||
xEnumDisplayDevices(
|
||||
PVOID Unused,
|
||||
DWORD iDevNum,
|
||||
__inout PDISPLAY_DEVICE lpDisplayDevice,
|
||||
DWORD dwFlags)
|
||||
{
|
||||
if (InitMultipleMonitorStubs())
|
||||
return g_pfnEnumDisplayDevices(Unused, iDevNum, lpDisplayDevice, dwFlags);
|
||||
|
||||
if (Unused != NULL)
|
||||
return FALSE;
|
||||
|
||||
if (iDevNum != 0)
|
||||
return FALSE;
|
||||
|
||||
if (lpDisplayDevice == NULL || lpDisplayDevice->cb < sizeof(DISPLAY_DEVICE))
|
||||
return FALSE;
|
||||
|
||||
#ifdef UNICODE
|
||||
MultiByteToWideChar(CP_ACP, 0, "DISPLAY", -1, lpDisplayDevice->DeviceName, (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)));
|
||||
MultiByteToWideChar(CP_ACP, 0, "DISPLAY", -1, lpDisplayDevice->DeviceString, (sizeof(lpDisplayDevice->DeviceString)/sizeof(TCHAR)));
|
||||
#else // UNICODE
|
||||
#if _MULTIMON_USE_SECURE_CRT
|
||||
strncpy_s((LPTSTR)lpDisplayDevice->DeviceName, (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)), TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)) - 1);
|
||||
strncpy_s((LPTSTR)lpDisplayDevice->DeviceString, (sizeof(lpDisplayDevice->DeviceString)/sizeof(TCHAR)), TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)) - 1);
|
||||
#else
|
||||
lstrcpyn((LPTSTR)lpDisplayDevice->DeviceName, TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceName)/sizeof(TCHAR)));
|
||||
lstrcpyn((LPTSTR)lpDisplayDevice->DeviceString, TEXT("DISPLAY"), (sizeof(lpDisplayDevice->DeviceString)/sizeof(TCHAR)));
|
||||
#endif // _MULTIMON_USE_SECURE_CRT
|
||||
#endif // UNICODE
|
||||
|
||||
lpDisplayDevice->StateFlags = DISPLAY_DEVICE_ATTACHED_TO_DESKTOP | DISPLAY_DEVICE_PRIMARY_DEVICE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#undef xPRIMARY_MONITOR
|
||||
#undef COMPILE_MULTIMON_STUBS
|
||||
|
||||
#else // COMPILE_MULTIMON_STUBS
|
||||
|
||||
extern int WINAPI xGetSystemMetrics(int);
|
||||
extern HMONITOR WINAPI xMonitorFromWindow(HWND, DWORD);
|
||||
extern HMONITOR WINAPI xMonitorFromRect(LPCRECT, DWORD);
|
||||
extern HMONITOR WINAPI xMonitorFromPoint(POINT, DWORD);
|
||||
extern BOOL WINAPI xGetMonitorInfo(HMONITOR, LPMONITORINFO);
|
||||
extern BOOL WINAPI xEnumDisplayMonitors(HDC, LPCRECT, MONITORENUMPROC, LPARAM);
|
||||
extern BOOL WINAPI xEnumDisplayDevices(PVOID, DWORD, PDISPLAY_DEVICE, DWORD);
|
||||
|
||||
#endif // COMPILE_MULTIMON_STUBS
|
||||
|
||||
//
|
||||
// build defines that replace the regular APIs with our versions
|
||||
//
|
||||
#define GetSystemMetrics xGetSystemMetrics
|
||||
#define MonitorFromWindow xMonitorFromWindow
|
||||
#define MonitorFromRect xMonitorFromRect
|
||||
#define MonitorFromPoint xMonitorFromPoint
|
||||
#define GetMonitorInfo xGetMonitorInfo
|
||||
#define EnumDisplayMonitors xEnumDisplayMonitors
|
||||
#define EnumDisplayDevices xEnumDisplayDevices
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
||||
|
||||
|
@ -156,6 +156,7 @@ OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" ON
|
||||
OCV_OPTION(WITH_OPENCLAMDFFT "Include AMD OpenCL FFT library support" ON IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_OPENCLAMDBLAS "Include AMD OpenCL BLAS library support" ON IF (NOT ANDROID AND NOT IOS) )
|
||||
OCV_OPTION(WITH_DIRECTX "Include DirectX support" ON IF WIN32 )
|
||||
OCV_OPTION(WITH_INTELPERC "Include Intel Perceptual Computing support" OFF IF WIN32 )
|
||||
|
||||
|
||||
# OpenCV build components
|
||||
@ -207,10 +208,12 @@ OCV_OPTION(ENABLE_SSSE3 "Enable SSSE3 instructions"
|
||||
OCV_OPTION(ENABLE_SSE41 "Enable SSE4.1 instructions" OFF IF ((CV_ICC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
|
||||
OCV_OPTION(ENABLE_SSE42 "Enable SSE4.2 instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND (X86 OR X86_64)) )
|
||||
OCV_OPTION(ENABLE_AVX "Enable AVX instructions" OFF IF ((MSVC OR CMAKE_COMPILER_IS_GNUCXX) AND (X86 OR X86_64)) )
|
||||
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF (CMAKE_COMPILER_IS_GNUCXX AND ARM) )
|
||||
OCV_OPTION(ENABLE_NEON "Enable NEON instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
|
||||
OCV_OPTION(ENABLE_VFPV3 "Enable VFPv3-D32 instructions" OFF IF CMAKE_COMPILER_IS_GNUCXX AND ARM )
|
||||
OCV_OPTION(ENABLE_NOISY_WARNINGS "Show all warnings even if they are too noisy" OFF )
|
||||
OCV_OPTION(OPENCV_WARNINGS_ARE_ERRORS "Treat warnings as errors" OFF )
|
||||
OCV_OPTION(ENABLE_WINRT_MODE "Build with Windows Runtime support" OFF IF WIN32 )
|
||||
OCV_OPTION(ENABLE_WINRT_MODE_NATIVE "Build with Windows Runtime native C++ support" OFF IF WIN32 )
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
@ -226,6 +229,15 @@ include(cmake/OpenCVVersion.cmake)
|
||||
# Save libs and executables in the same place
|
||||
set(EXECUTABLE_OUTPUT_PATH "${CMAKE_BINARY_DIR}/bin" CACHE PATH "Output directory for applications" )
|
||||
|
||||
if (ANDROID)
|
||||
if (ANDROID_ABI MATCHES "NEON")
|
||||
set(ENABLE_NEON ON)
|
||||
endif()
|
||||
if (ANDROID_ABI MATCHES "VFPV3")
|
||||
set(ENABLE_VFPV3 ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ANDROID OR WIN32)
|
||||
set(OPENCV_DOC_INSTALL_PATH doc)
|
||||
elseif(INSTALL_TO_MANGLED_PATHS)
|
||||
@ -373,6 +385,8 @@ if(UNIX)
|
||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m log)
|
||||
elseif(${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD|NetBSD|DragonFly")
|
||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} m pthread)
|
||||
elseif(EMSCRIPTEN)
|
||||
# no need to link to system libs with emscripten
|
||||
else()
|
||||
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m pthread rt)
|
||||
endif()
|
||||
@ -630,7 +644,7 @@ endif()
|
||||
if(WIN32)
|
||||
status("")
|
||||
status(" Windows RT support:" HAVE_WINRT THEN YES ELSE NO)
|
||||
if (ENABLE_WINRT_MODE)
|
||||
if (ENABLE_WINRT_MODE OR ENABLE_WINRT_MODE_NATIVE)
|
||||
status(" Windows SDK v8.0:" ${WINDOWS_SDK_PATH})
|
||||
status(" Visual Studio 2012:" ${VISUAL_STUDIO_PATH})
|
||||
endif()
|
||||
@ -820,6 +834,11 @@ if(DEFINED WITH_XINE)
|
||||
status(" Xine:" HAVE_XINE THEN "YES (ver ${ALIASOF_libxine_VERSION})" ELSE NO)
|
||||
endif(DEFINED WITH_XINE)
|
||||
|
||||
if(DEFINED WITH_INTELPERC)
|
||||
status(" Intel PerC:" HAVE_INTELPERC THEN "YES" ELSE NO)
|
||||
endif(DEFINED WITH_INTELPERC)
|
||||
|
||||
|
||||
# ========================== Other third-party libraries ==========================
|
||||
status("")
|
||||
status(" Other third-party libraries:")
|
||||
|
@ -9,7 +9,7 @@ set(HAVE_WINRT FALSE)
|
||||
# search Windows Platform SDK
|
||||
message(STATUS "Checking for Windows Platform SDK")
|
||||
GET_FILENAME_COMPONENT(WINDOWS_SDK_PATH "[HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\v8.0;InstallationFolder]" ABSOLUTE CACHE)
|
||||
if (WINDOWS_SDK_PATH STREQUAL "")
|
||||
if(WINDOWS_SDK_PATH STREQUAL "")
|
||||
set(HAVE_MSPDK FALSE)
|
||||
message(STATUS "Windows Platform SDK 8.0 was not found")
|
||||
else()
|
||||
@ -19,7 +19,7 @@ endif()
|
||||
#search for Visual Studio 11.0 install directory
|
||||
message(STATUS "Checking for Visual Studio 2012")
|
||||
GET_FILENAME_COMPONENT(VISUAL_STUDIO_PATH [HKEY_LOCAL_MACHINE\\SOFTWARE\\Microsoft\\VisualStudio\\11.0\\Setup\\VS;ProductDir] REALPATH CACHE)
|
||||
if (VISUAL_STUDIO_PATH STREQUAL "")
|
||||
if(VISUAL_STUDIO_PATH STREQUAL "")
|
||||
set(HAVE_MSVC2012 FALSE)
|
||||
message(STATUS "Visual Studio 2012 was not found")
|
||||
else()
|
||||
@ -30,11 +30,15 @@ try_compile(HAVE_WINRT_SDK
|
||||
"${OpenCV_BINARY_DIR}"
|
||||
"${OpenCV_SOURCE_DIR}/cmake/checks/winrttest.cpp")
|
||||
|
||||
if (ENABLE_WINRT_MODE AND HAVE_WINRT_SDK AND HAVE_MSVC2012 AND HAVE_MSPDK)
|
||||
if(ENABLE_WINRT_MODE AND HAVE_WINRT_SDK AND HAVE_MSVC2012 AND HAVE_MSPDK)
|
||||
set(HAVE_WINRT TRUE)
|
||||
set(HAVE_WINRT_CX TRUE)
|
||||
elseif(ENABLE_WINRT_MODE_NATIVE AND HAVE_WINRT_SDK AND HAVE_MSVC2012 AND HAVE_MSPDK)
|
||||
set(HAVE_WINRT TRUE)
|
||||
set(HAVE_WINRT_CX FALSE)
|
||||
endif()
|
||||
|
||||
if (HAVE_WINRT)
|
||||
if(HAVE_WINRT)
|
||||
add_definitions(/DWINVER=0x0602 /DNTDDI_VERSION=NTDDI_WIN8 /D_WIN32_WINNT=0x0602)
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /appcontainer")
|
||||
set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /appcontainer")
|
||||
|
@ -124,6 +124,12 @@ if(CMAKE_COMPILER_IS_GNUCXX)
|
||||
if(ENABLE_SSE2)
|
||||
add_extra_compiler_option(-msse2)
|
||||
endif()
|
||||
if (ENABLE_NEON)
|
||||
add_extra_compiler_option("-mfpu=neon")
|
||||
endif()
|
||||
if (ENABLE_VFPV3 AND NOT ENABLE_NEON)
|
||||
add_extra_compiler_option("-mfpu=vfpv3")
|
||||
endif()
|
||||
|
||||
# SSE3 and further should be disabled under MingW because it generates compiler errors
|
||||
if(NOT MINGW)
|
||||
|
20
cmake/OpenCVFindIntelPerCSDK.cmake
Normal file
20
cmake/OpenCVFindIntelPerCSDK.cmake
Normal file
@ -0,0 +1,20 @@
|
||||
# Main variables:
|
||||
# INTELPERC_LIBRARIES and INTELPERC_INCLUDE to link Intel Perceptial Computing SDK modules
|
||||
# HAVE_INTELPERC for conditional compilation OpenCV with/without Intel Perceptial Computing SDK
|
||||
|
||||
if(X86_64)
|
||||
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
|
||||
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/x64" DOC "Path to Intel Perceptual Computing SDK interface libraries")
|
||||
else()
|
||||
find_path(INTELPERC_INCLUDE_DIR "pxcsession.h" PATHS "$ENV{PCSDK_DIR}include" DOC "Path to Intel Perceptual Computing SDK interface headers")
|
||||
find_file(INTELPERC_LIBRARIES "libpxc.lib" PATHS "$ENV{PCSDK_DIR}lib/Win32" DOC "Path to Intel Perceptual Computing SDK interface libraries")
|
||||
endif()
|
||||
|
||||
if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
|
||||
set(HAVE_INTELPERC TRUE)
|
||||
else()
|
||||
set(HAVE_INTELPERC FALSE)
|
||||
message(WARNING "Intel Perceptual Computing SDK library directory (set by INTELPERC_LIB_DIR variable) is not found or does not have Intel Perceptual Computing SDK libraries.")
|
||||
endif() #if(INTELPERC_INCLUDE_DIR AND INTELPERC_LIBRARIES)
|
||||
|
||||
mark_as_advanced(FORCE INTELPERC_LIBRARIES INTELPERC_INCLUDE_DIR)
|
@ -277,3 +277,8 @@ if (NOT IOS)
|
||||
set(HAVE_QTKIT YES)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# --- Intel Perceptual Computing SDK ---
|
||||
if(WITH_INTELPERC)
|
||||
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVFindIntelPerCSDK.cmake")
|
||||
endif(WITH_INTELPERC)
|
||||
|
@ -88,6 +88,9 @@
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#cmakedefine HAVE_INTTYPES_H 1
|
||||
|
||||
/* Intel Perceptual Computing SDK library */
|
||||
#cmakedefine HAVE_INTELPERC
|
||||
|
||||
/* Intel Integrated Performance Primitives */
|
||||
#cmakedefine HAVE_IPP
|
||||
|
||||
|
@ -48,10 +48,10 @@ The structure of package contents looks as follows:
|
||||
|
||||
::
|
||||
|
||||
OpenCV-2.4.7-android-sdk
|
||||
OpenCV-2.4.8-android-sdk
|
||||
|_ apk
|
||||
| |_ OpenCV_2.4.7_binary_pack_armv7a.apk
|
||||
| |_ OpenCV_2.4.7_Manager_2.14_XXX.apk
|
||||
| |_ OpenCV_2.4.8_binary_pack_armv7a.apk
|
||||
| |_ OpenCV_2.4.8_Manager_2.16_XXX.apk
|
||||
|
|
||||
|_ doc
|
||||
|_ samples
|
||||
@ -157,10 +157,10 @@ Get the OpenCV4Android SDK
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
unzip ~/Downloads/OpenCV-2.4.7-android-sdk.zip
|
||||
unzip ~/Downloads/OpenCV-2.4.8-android-sdk.zip
|
||||
|
||||
.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.7-android-sdk.zip`
|
||||
.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.7/OpenCV-2.4.7-android-sdk.zip/download
|
||||
.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.8-android-sdk.zip`
|
||||
.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.8/OpenCV-2.4.8-android-sdk.zip/download
|
||||
.. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack|
|
||||
.. |seven_zip| replace:: 7-Zip
|
||||
.. _seven_zip: http://www.7-zip.org/
|
||||
@ -295,7 +295,7 @@ Well, running samples from Eclipse is very simple:
|
||||
.. code-block:: sh
|
||||
:linenos:
|
||||
|
||||
<Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.7_Manager_2.14_armv7a-neon.apk
|
||||
<Android SDK path>/platform-tools/adb install <OpenCV4Android SDK path>/apk/OpenCV_2.4.8_Manager_2.16_armv7a-neon.apk
|
||||
|
||||
.. note:: ``armeabi``, ``armv7a-neon``, ``arm7a-neon-android8``, ``mips`` and ``x86`` stand for
|
||||
platform targets:
|
||||
|
@ -55,14 +55,14 @@ Manager to access OpenCV libraries externally installed in the target system.
|
||||
:guilabel:`File -> Import -> Existing project in your workspace`.
|
||||
|
||||
Press :guilabel:`Browse` button and locate OpenCV4Android SDK
|
||||
(:file:`OpenCV-2.4.7-android-sdk/sdk`).
|
||||
(:file:`OpenCV-2.4.8-android-sdk/sdk`).
|
||||
|
||||
.. image:: images/eclipse_opencv_dependency0.png
|
||||
:alt: Add dependency from OpenCV library
|
||||
:align: center
|
||||
|
||||
#. In application project add a reference to the OpenCV Java SDK in
|
||||
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.7``.
|
||||
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.8``.
|
||||
|
||||
.. image:: images/eclipse_opencv_dependency1.png
|
||||
:alt: Add dependency from OpenCV library
|
||||
@ -128,27 +128,27 @@ described above.
|
||||
#. Add the OpenCV library project to your workspace the same way as for the async initialization
|
||||
above. Use menu :guilabel:`File -> Import -> Existing project in your workspace`,
|
||||
press :guilabel:`Browse` button and select OpenCV SDK path
|
||||
(:file:`OpenCV-2.4.7-android-sdk/sdk`).
|
||||
(:file:`OpenCV-2.4.8-android-sdk/sdk`).
|
||||
|
||||
.. image:: images/eclipse_opencv_dependency0.png
|
||||
:alt: Add dependency from OpenCV library
|
||||
:align: center
|
||||
|
||||
#. In the application project add a reference to the OpenCV4Android SDK in
|
||||
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.7``;
|
||||
:guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.8``;
|
||||
|
||||
.. image:: images/eclipse_opencv_dependency1.png
|
||||
:alt: Add dependency from OpenCV library
|
||||
:align: center
|
||||
|
||||
#. If your application project **doesn't have a JNI part**, just copy the corresponding OpenCV
|
||||
native libs from :file:`<OpenCV-2.4.7-android-sdk>/sdk/native/libs/<target_arch>` to your
|
||||
native libs from :file:`<OpenCV-2.4.8-android-sdk>/sdk/native/libs/<target_arch>` to your
|
||||
project directory to folder :file:`libs/<target_arch>`.
|
||||
|
||||
In case of the application project **with a JNI part**, instead of manual libraries copying you
|
||||
need to modify your ``Android.mk`` file:
|
||||
add the following two code lines after the ``"include $(CLEAR_VARS)"`` and before
|
||||
``"include path_to_OpenCV-2.4.7-android-sdk/sdk/native/jni/OpenCV.mk"``
|
||||
``"include path_to_OpenCV-2.4.8-android-sdk/sdk/native/jni/OpenCV.mk"``
|
||||
|
||||
.. code-block:: make
|
||||
:linenos:
|
||||
@ -221,7 +221,7 @@ taken:
|
||||
|
||||
.. code-block:: make
|
||||
|
||||
include C:\Work\OpenCV4Android\OpenCV-2.4.7-android-sdk\sdk\native\jni\OpenCV.mk
|
||||
include C:\Work\OpenCV4Android\OpenCV-2.4.8-android-sdk\sdk\native\jni\OpenCV.mk
|
||||
|
||||
Should be inserted into the :file:`jni/Android.mk` file **after** this line:
|
||||
|
||||
|
@ -106,8 +106,8 @@ Enable hardware optimizations
|
||||
-----------------------------
|
||||
|
||||
Depending on target platform architecture different instruction sets can be used. By default
|
||||
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DUSE_VFPV3=ON``
|
||||
to cmake command line to enable code generation for VFPv3 and ``-DUSE_NEON=ON`` for using
|
||||
compiler generates code for armv5l without VFPv3 and NEON extensions. Add ``-DENABLE_VFPV3=ON``
|
||||
to cmake command line to enable code generation for VFPv3 and ``-DENABLE_NEON=ON`` for using
|
||||
NEON SIMD extensions.
|
||||
|
||||
TBB is supported on multi core ARM SoCs also.
|
||||
|
79
doc/user_guide/ug_intelperc.rst
Normal file
79
doc/user_guide/ug_intelperc.rst
Normal file
@ -0,0 +1,79 @@
|
||||
*******
|
||||
HighGUI
|
||||
*******
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
Using Creative Senz3D and other Intel Perceptual Computing SDK compatible depth sensors
|
||||
=======================================================================================
|
||||
|
||||
Depth sensors compatible with Intel Perceptual Computing SDK are supported through ``VideoCapture`` class. Depth map, RGB image and some other formats of output can be retrieved by using familiar interface of ``VideoCapture``.
|
||||
|
||||
In order to use depth sensor with OpenCV you should do the following preliminary steps:
|
||||
|
||||
#.
|
||||
Install Intel Perceptual Computing SDK (from here http://www.intel.com/software/perceptual).
|
||||
|
||||
#.
|
||||
Configure OpenCV with Intel Perceptual Computing SDK support by setting ``WITH_INTELPERC`` flag in CMake. If Intel Perceptual Computing SDK is found in install folders OpenCV will be built with Intel Perceptual Computing SDK library (see a status ``INTELPERC`` in CMake log). If CMake process doesn't find Intel Perceptual Computing SDK installation folder automatically, the user should change corresponding CMake variables ``INTELPERC_LIB_DIR`` and ``INTELPERC_INCLUDE_DIR`` to the proper value.
|
||||
|
||||
#.
|
||||
Build OpenCV.
|
||||
|
||||
VideoCapture can retrieve the following data:
|
||||
|
||||
#.
|
||||
data given from depth generator:
|
||||
* ``CV_CAP_INTELPERC_DEPTH_MAP`` - each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth. (CV_16UC1)
|
||||
* ``CV_CAP_INTELPERC_UVDEPTH_MAP`` - each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates. (CV_32FC2)
|
||||
* ``CV_CAP_INTELPERC_IR_MAP`` - each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam. (CV_16UC1)
|
||||
#.
|
||||
data given from RGB image generator:
|
||||
* ``CV_CAP_INTELPERC_IMAGE`` - color image. (CV_8UC3)
|
||||
|
||||
In order to get depth map from depth sensor use ``VideoCapture::operator >>``, e. g. ::
|
||||
|
||||
VideoCapture capture( CV_CAP_INTELPERC );
|
||||
for(;;)
|
||||
{
|
||||
Mat depthMap;
|
||||
capture >> depthMap;
|
||||
|
||||
if( waitKey( 30 ) >= 0 )
|
||||
break;
|
||||
}
|
||||
|
||||
For getting several data maps use ``VideoCapture::grab`` and ``VideoCapture::retrieve``, e.g. ::
|
||||
|
||||
VideoCapture capture(CV_CAP_INTELPERC);
|
||||
for(;;)
|
||||
{
|
||||
Mat depthMap;
|
||||
Mat image;
|
||||
Mat irImage;
|
||||
|
||||
capture.grab();
|
||||
|
||||
capture.retrieve( depthMap, CV_CAP_INTELPERC_DEPTH_MAP );
|
||||
capture.retrieve( image, CV_CAP_INTELPERC_IMAGE );
|
||||
capture.retrieve( irImage, CV_CAP_INTELPERC_IR_MAP);
|
||||
|
||||
if( waitKey( 30 ) >= 0 )
|
||||
break;
|
||||
}
|
||||
|
||||
For setting and getting some property of sensor` data generators use ``VideoCapture::set`` and ``VideoCapture::get`` methods respectively, e.g. ::
|
||||
|
||||
VideoCapture capture( CV_CAP_INTELPERC );
|
||||
capture.set( CV_CAP_INTELPERC_DEPTH_GENERATOR | CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0 );
|
||||
cout << "FPS " << capture.get( CV_CAP_INTELPERC_DEPTH_GENERATOR+CV_CAP_PROP_FPS ) << endl;
|
||||
|
||||
Since two types of sensor's data generators are supported (image generator and depth generator), there are two flags that should be used to set/get property of the needed generator:
|
||||
|
||||
* CV_CAP_INTELPERC_IMAGE_GENERATOR -- a flag for access to the image generator properties.
|
||||
|
||||
* CV_CAP_INTELPERC_DEPTH_GENERATOR -- a flag for access to the depth generator properties. This flag value is assumed by default if neither of the two possible values of the property is set.
|
||||
|
||||
For more information please refer to the example of usage intelperc_capture.cpp_ in ``opencv/samples/cpp`` folder.
|
||||
|
||||
.. _intelperc_capture.cpp: https://github.com/Itseez/opencv/tree/master/samples/cpp/intelperc_capture.cpp
|
@ -9,3 +9,4 @@ OpenCV User Guide
|
||||
ug_features2d.rst
|
||||
ug_highgui.rst
|
||||
ug_traincascade.rst
|
||||
ug_intelperc.rst
|
||||
|
@ -2,8 +2,11 @@ set(the_description "The Core Functionality")
|
||||
ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES} "${OPENCL_LIBRARIES}" OPTIONAL opencv_cudev)
|
||||
ocv_module_include_directories(${ZLIB_INCLUDE_DIRS})
|
||||
|
||||
if(HAVE_WINRT_CX)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW")
|
||||
endif()
|
||||
if(HAVE_WINRT)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
|
||||
endif()
|
||||
|
||||
if(HAVE_CUDA)
|
||||
|
@ -903,7 +903,7 @@ So, the function chooses an operation mode depending on the flags and size of th
|
||||
|
||||
* When ``DFT_COMPLEX_OUTPUT`` is set, the output is a complex matrix of the same size as input.
|
||||
|
||||
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DCT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
|
||||
* When ``DFT_COMPLEX_OUTPUT`` is not set, the output is a real matrix of the same size as input. In case of 2D transform, it uses the packed format as shown above. In case of a single 1D transform, it looks like the first row of the matrix above. In case of multiple 1D transforms (when using the ``DFT_ROWS`` flag), each row of the output matrix looks like the first row of the matrix above.
|
||||
|
||||
* If the input array is complex and either ``DFT_INVERSE`` or ``DFT_REAL_OUTPUT`` are not set, the output is a complex array of the same size as input. The function performs a forward or inverse 1D or 2D transform of the whole input array or each row of the input array independently, depending on the flags ``DFT_INVERSE`` and ``DFT_ROWS``.
|
||||
|
||||
|
@ -444,7 +444,7 @@ CV_INLINE int cvIsInf( double value )
|
||||
// atomic increment on the linux version of the Intel(tm) compiler
|
||||
# define CV_XADD(addr, delta) (int)_InterlockedExchangeAdd(const_cast<void*>(reinterpret_cast<volatile void*>(addr)), delta)
|
||||
#elif defined __GNUC__
|
||||
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__
|
||||
# if defined __clang__ && __clang_major__ >= 3 && !defined __ANDROID__ && !defined __EMSCRIPTEN__ && !defined(__CUDACC__)
|
||||
# ifdef __ATOMIC_ACQ_REL
|
||||
# define CV_XADD(addr, delta) __c11_atomic_fetch_add((_Atomic(int)*)(addr), delta, __ATOMIC_ACQ_REL)
|
||||
# else
|
||||
|
@ -267,6 +267,12 @@ inline _InputOutputArray::_InputOutputArray(const Mat& m)
|
||||
inline _InputOutputArray::_InputOutputArray(const std::vector<Mat>& vec)
|
||||
{ init(FIXED_SIZE + STD_VECTOR_MAT + ACCESS_RW, &vec); }
|
||||
|
||||
inline _InputOutputArray::_InputOutputArray(const UMat& m)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + UMAT + ACCESS_RW, &m); }
|
||||
|
||||
inline _InputOutputArray::_InputOutputArray(const std::vector<UMat>& vec)
|
||||
{ init(FIXED_SIZE + STD_VECTOR_UMAT + ACCESS_RW, &vec); }
|
||||
|
||||
inline _InputOutputArray::_InputOutputArray(const cuda::GpuMat& d_mat)
|
||||
{ init(FIXED_TYPE + FIXED_SIZE + GPU_MAT + ACCESS_RW, &d_mat); }
|
||||
|
||||
|
@ -210,6 +210,7 @@ public:
|
||||
Context2(const Context2& c);
|
||||
Context2& operator = (const Context2& c);
|
||||
|
||||
bool create();
|
||||
bool create(int dtype);
|
||||
size_t ndevices() const;
|
||||
const Device& device(size_t idx) const;
|
||||
@ -488,6 +489,7 @@ public:
|
||||
bool runTask(bool sync, const Queue& q=Queue());
|
||||
|
||||
size_t workGroupSize() const;
|
||||
size_t preferedWorkGroupSizeMultiple() const;
|
||||
bool compileWorkGroupSize(size_t wsz[]) const;
|
||||
size_t localMemSize() const;
|
||||
|
||||
|
@ -394,7 +394,9 @@ template<typename _Tp> static inline _Tp randu()
|
||||
return (_Tp)theRNG();
|
||||
}
|
||||
|
||||
///////////////////////////////// Formatted string generation /////////////////////////////////
|
||||
|
||||
CV_EXPORTS String format( const char* fmt, ... );
|
||||
|
||||
///////////////////////////////// Formatted output of cv::Mat /////////////////////////////////
|
||||
|
||||
|
@ -85,7 +85,7 @@ template<typename _Tp, size_t fixed_size = 1024/sizeof(_Tp)+8> class AutoBuffer
|
||||
public:
|
||||
typedef _Tp value_type;
|
||||
|
||||
//! the default contructor
|
||||
//! the default constructor
|
||||
AutoBuffer();
|
||||
//! constructor taking the real buffer size
|
||||
AutoBuffer(size_t _size);
|
||||
|
@ -47,13 +47,81 @@
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
///////////// Lut ////////////////////////
|
||||
|
||||
typedef Size_MatType LUTFixture;
|
||||
|
||||
OCL_PERF_TEST_P(LUTFixture, LUT,
|
||||
::testing::Combine(OCL_TEST_SIZES,
|
||||
OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), cn = CV_MAT_CN(type);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, CV_8UC(cn)), lut(1, 256, type);
|
||||
int dstType = CV_MAKETYPE(lut.depth(), src.channels());
|
||||
UMat dst(srcSize, dstType);
|
||||
|
||||
declare.in(src, lut, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::LUT(src, lut, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Exp ////////////////////////
|
||||
|
||||
typedef Size_MatType ExpFixture;
|
||||
|
||||
OCL_PERF_TEST_P(ExpFixture, Exp, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src).out(dst);
|
||||
randu(src, 5, 16);
|
||||
|
||||
OCL_TEST_CYCLE() cv::exp(src, dst);
|
||||
|
||||
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
|
||||
///////////// Log ////////////////////////
|
||||
|
||||
typedef Size_MatType LogFixture;
|
||||
|
||||
OCL_PERF_TEST_P(LogFixture, Log, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
randu(src, 1, 10000);
|
||||
declare.in(src).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::log(src, dst);
|
||||
|
||||
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
|
||||
///////////// Add ////////////////////////
|
||||
|
||||
typedef Size_MatType AddFixture;
|
||||
|
||||
OCL_PERF_TEST_P(AddFixture, Add,
|
||||
::testing::Combine(OCL_TEST_SIZES,
|
||||
OCL_TEST_TYPES))
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size srcSize = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
@ -61,15 +129,691 @@ OCL_PERF_TEST_P(AddFixture, Add,
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
randu(src1);
|
||||
randu(src2);
|
||||
declare.in(src1, src2).out(dst);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::add(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Subtract ////////////////////////
|
||||
|
||||
typedef Size_MatType SubtractFixture;
|
||||
|
||||
OCL_PERF_TEST_P(SubtractFixture, Subtract,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::subtract(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Mul ////////////////////////
|
||||
|
||||
typedef Size_MatType MulFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MulFixture, Multiply, ::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::multiply(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Div ////////////////////////
|
||||
|
||||
typedef Size_MatType DivFixture;
|
||||
|
||||
OCL_PERF_TEST_P(DivFixture, Divide,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::divide(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
|
||||
///////////// Absdiff ////////////////////////
|
||||
|
||||
typedef Size_MatType AbsDiffFixture;
|
||||
|
||||
OCL_PERF_TEST_P(AbsDiffFixture, Absdiff,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).in(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::absdiff(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// CartToPolar ////////////////////////
|
||||
|
||||
typedef Size_MatType CartToPolarFixture;
|
||||
|
||||
OCL_PERF_TEST_P(CartToPolarFixture, CartToPolar, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type),
|
||||
dst1(srcSize, type), dst2(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst1, dst2);
|
||||
|
||||
OCL_TEST_CYCLE() cv::cartToPolar(src1, src2, dst1, dst2);
|
||||
|
||||
SANITY_CHECK(dst1, 8e-3);
|
||||
SANITY_CHECK(dst2, 8e-3);
|
||||
}
|
||||
|
||||
///////////// PolarToCart ////////////////////////
|
||||
|
||||
typedef Size_MatType PolarToCartFixture;
|
||||
|
||||
OCL_PERF_TEST_P(PolarToCartFixture, PolarToCart, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type),
|
||||
dst1(srcSize, type), dst2(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst1, dst2);
|
||||
|
||||
OCL_TEST_CYCLE() cv::polarToCart(src1, src2, dst1, dst2);
|
||||
|
||||
SANITY_CHECK(dst1, 5e-5);
|
||||
SANITY_CHECK(dst2, 5e-5);
|
||||
}
|
||||
|
||||
///////////// Magnitude ////////////////////////
|
||||
|
||||
typedef Size_MatType MagnitudeFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MagnitudeFixture, Magnitude, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type),
|
||||
dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::magnitude(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst, 1e-6);
|
||||
}
|
||||
|
||||
///////////// Transpose ////////////////////////
|
||||
|
||||
typedef Size_MatType TransposeFixture;
|
||||
|
||||
OCL_PERF_TEST_P(TransposeFixture, Transpose, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::transpose(src, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Flip ////////////////////////
|
||||
|
||||
enum
|
||||
{
|
||||
FLIP_BOTH = 0, FLIP_ROWS, FLIP_COLS
|
||||
};
|
||||
|
||||
CV_ENUM(FlipType, FLIP_BOTH, FLIP_ROWS, FLIP_COLS)
|
||||
|
||||
typedef std::tr1::tuple<Size, MatType, FlipType> FlipParams;
|
||||
typedef TestBaseWithParam<FlipParams> FlipFixture;
|
||||
|
||||
OCL_PERF_TEST_P(FlipFixture, Flip,
|
||||
::testing::Combine(OCL_TEST_SIZES,
|
||||
OCL_TEST_TYPES, FlipType::all()))
|
||||
{
|
||||
const FlipParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
const int flipType = get<2>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::flip(src, dst, flipType - 1);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// minMaxLoc ////////////////////////
|
||||
|
||||
typedef Size_MatType MinMaxLocFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MinMaxLocFixture, MinMaxLoc,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
bool onecn = CV_MAT_CN(type) == 1;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type);;
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
double min_val = 0.0, max_val = 0.0;
|
||||
Point min_loc, max_loc;
|
||||
|
||||
OCL_TEST_CYCLE() cv::minMaxLoc(src, &min_val, &max_val, onecn ? &min_loc : NULL,
|
||||
onecn ? &max_loc : NULL);
|
||||
|
||||
ASSERT_GE(max_val, min_val);
|
||||
SANITY_CHECK(min_val);
|
||||
SANITY_CHECK(max_val);
|
||||
|
||||
int min_loc_x = min_loc.x, min_loc_y = min_loc.y, max_loc_x = max_loc.x,
|
||||
max_loc_y = max_loc.y;
|
||||
SANITY_CHECK(min_loc_x);
|
||||
SANITY_CHECK(min_loc_y);
|
||||
SANITY_CHECK(max_loc_x);
|
||||
SANITY_CHECK(max_loc_y);
|
||||
}
|
||||
|
||||
///////////// Sum ////////////////////////
|
||||
|
||||
typedef Size_MatType SumFixture;
|
||||
|
||||
OCL_PERF_TEST_P(SumFixture, Sum,
|
||||
::testing::Combine(OCL_TEST_SIZES,
|
||||
OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), depth = CV_MAT_DEPTH(type);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type);
|
||||
Scalar result;
|
||||
randu(src, 0, 60);
|
||||
declare.in(src);
|
||||
|
||||
OCL_TEST_CYCLE() result = cv::sum(src);
|
||||
|
||||
if (depth >= CV_32F)
|
||||
SANITY_CHECK(result, 1e-6, ERROR_RELATIVE);
|
||||
else
|
||||
SANITY_CHECK(result);
|
||||
}
|
||||
|
||||
///////////// countNonZero ////////////////////////
|
||||
|
||||
typedef Size_MatType CountNonZeroFixture;
|
||||
|
||||
OCL_PERF_TEST_P(CountNonZeroFixture, CountNonZero,
|
||||
::testing::Combine(OCL_TEST_SIZES,
|
||||
OCL_PERF_ENUM(CV_8UC1, CV_32FC1)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type);
|
||||
int result = 0;
|
||||
randu(src, 0, 10);
|
||||
declare.in(src);
|
||||
|
||||
OCL_TEST_CYCLE() result = cv::countNonZero(src);
|
||||
|
||||
SANITY_CHECK(result);
|
||||
}
|
||||
|
||||
///////////// Phase ////////////////////////
|
||||
|
||||
typedef Size_MatType PhaseFixture;
|
||||
|
||||
OCL_PERF_TEST_P(PhaseFixture, Phase, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type),
|
||||
dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::phase(src1, src2, dst, 1);
|
||||
|
||||
SANITY_CHECK(dst, 1e-2);
|
||||
}
|
||||
|
||||
///////////// bitwise_and////////////////////////
|
||||
|
||||
typedef Size_MatType BitwiseAndFixture;
|
||||
|
||||
OCL_PERF_TEST_P(BitwiseAndFixture, Bitwise_and,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::bitwise_and(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// bitwise_xor ////////////////////////
|
||||
|
||||
typedef Size_MatType BitwiseXorFixture;
|
||||
|
||||
OCL_PERF_TEST_P(BitwiseXorFixture, Bitwise_xor,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::bitwise_xor(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// bitwise_or ////////////////////////
|
||||
|
||||
typedef Size_MatType BitwiseOrFixture;
|
||||
|
||||
OCL_PERF_TEST_P(BitwiseOrFixture, Bitwise_or,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::bitwise_or(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// bitwise_not ////////////////////////
|
||||
|
||||
typedef Size_MatType BitwiseNotFixture;
|
||||
|
||||
OCL_PERF_TEST_P(BitwiseNotFixture, Bitwise_not,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::bitwise_not(src, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// compare////////////////////////
|
||||
|
||||
CV_ENUM(CmpCode, CMP_LT, CMP_LE, CMP_EQ, CMP_NE, CMP_GE, CMP_GT)
|
||||
|
||||
typedef std::tr1::tuple<Size, MatType, CmpCode> CompareParams;
|
||||
typedef TestBaseWithParam<CompareParams> CompareFixture;
|
||||
|
||||
OCL_PERF_TEST_P(CompareFixture, Compare,
|
||||
::testing::Combine(OCL_TEST_SIZES,
|
||||
OCL_TEST_TYPES, CmpCode::all()))
|
||||
{
|
||||
const CompareParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
const int cmpCode = get<2>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, CV_8UC1);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::compare(src1, src2, dst, cmpCode);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// pow ////////////////////////
|
||||
|
||||
typedef Size_MatType PowFixture;
|
||||
|
||||
OCL_PERF_TEST_P(PowFixture, Pow, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
randu(src, -100, 100);
|
||||
declare.in(src).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::pow(src, -2.0, dst);
|
||||
|
||||
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
|
||||
///////////// AddWeighted////////////////////////
|
||||
|
||||
typedef Size_MatType AddWeightedFixture;
|
||||
|
||||
OCL_PERF_TEST_P(AddWeightedFixture, AddWeighted,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), depth = CV_MAT_DEPTH(type);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
double alpha = 2.0, beta = 1.0, gama = 3.0;
|
||||
|
||||
OCL_TEST_CYCLE() cv::addWeighted(src1, alpha, src2, beta, gama, dst);
|
||||
|
||||
if (depth >= CV_32F)
|
||||
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
|
||||
else
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Sqrt ///////////////////////
|
||||
|
||||
typedef Size_MatType SqrtFixture;
|
||||
|
||||
OCL_PERF_TEST_P(SqrtFixture, Sqrt, ::testing::Combine(
|
||||
OCL_TEST_SIZES, OCL_PERF_ENUM(CV_32FC1, CV_32FC4)))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
randu(src, 0, 1000);
|
||||
declare.in(src).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::sqrt(src, dst);
|
||||
|
||||
SANITY_CHECK(dst, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
|
||||
///////////// SetIdentity ////////////////////////
|
||||
|
||||
typedef Size_MatType SetIdentityFixture;
|
||||
|
||||
OCL_PERF_TEST_P(SetIdentityFixture, SetIdentity,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat dst(srcSize, type);
|
||||
declare.out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::setIdentity(dst, cv::Scalar::all(181));
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// MeanStdDev ////////////////////////
|
||||
|
||||
typedef Size_MatType MeanStdDevFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MeanStdDevFixture, DISABLED_MeanStdDev,
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
const double eps = 1e-5;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type);
|
||||
Scalar mean, stddev;
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
OCL_TEST_CYCLE() cv::meanStdDev(src, mean, stddev);
|
||||
|
||||
double mean0 = mean[0], mean1 = mean[1], mean2 = mean[2], mean3 = mean[3];
|
||||
double stddev0 = stddev[0], stddev1 = stddev[1], stddev2 = stddev[2], stddev3 = stddev[3];
|
||||
|
||||
SANITY_CHECK(mean0, eps, ERROR_RELATIVE);
|
||||
SANITY_CHECK(mean1, eps, ERROR_RELATIVE);
|
||||
SANITY_CHECK(mean2, eps, ERROR_RELATIVE);
|
||||
SANITY_CHECK(mean3, eps, ERROR_RELATIVE);
|
||||
SANITY_CHECK(stddev0, eps, ERROR_RELATIVE);
|
||||
SANITY_CHECK(stddev1, eps, ERROR_RELATIVE);
|
||||
SANITY_CHECK(stddev2, eps, ERROR_RELATIVE);
|
||||
SANITY_CHECK(stddev3, eps, ERROR_RELATIVE);
|
||||
}
|
||||
|
||||
///////////// Norm ////////////////////////
|
||||
|
||||
CV_ENUM(NormType, NORM_INF, NORM_L1, NORM_L2)
|
||||
|
||||
typedef std::tr1::tuple<Size, MatType, NormType> NormParams;
|
||||
typedef TestBaseWithParam<NormParams> NormFixture;
|
||||
|
||||
OCL_PERF_TEST_P(NormFixture, DISABLED_Norm,
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES, NormType::all()))
|
||||
{
|
||||
const NormParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
const int normType = get<2>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type);
|
||||
double res;
|
||||
declare.in(src1, src2, WARMUP_RNG);
|
||||
|
||||
OCL_TEST_CYCLE() res = cv::norm(src1, src2, normType);
|
||||
|
||||
SANITY_CHECK(res, 1e-6, ERROR_RELATIVE);
|
||||
}
|
||||
|
||||
///////////// Repeat ////////////////////////
|
||||
|
||||
typedef Size_MatType RepeatFixture;
|
||||
|
||||
OCL_PERF_TEST_P(RepeatFixture, Repeat,
|
||||
::testing::Combine(OCL_PERF_ENUM(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3), OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), nx = 2, ny = 2;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(Size(srcSize.width * nx, srcSize.height * ny), type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::repeat(src, nx, ny, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Min ////////////////////////
|
||||
|
||||
typedef Size_MatType MinFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MinFixture, Min,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::min(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Max ////////////////////////
|
||||
|
||||
typedef Size_MatType MaxFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MaxFixture, Max,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src1(srcSize, type), src2(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::max(src1, src2, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// InRange ////////////////////////
|
||||
|
||||
typedef Size_MatType InRangeFixture;
|
||||
|
||||
OCL_PERF_TEST_P(InRangeFixture, InRange,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES))
|
||||
{
|
||||
const Size_MatType_t params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), lb(srcSize, type), ub(srcSize, type), dst(srcSize, CV_8UC1);
|
||||
declare.in(src, lb, ub, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::inRange(src, lb, ub, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Normalize ////////////////////////
|
||||
|
||||
CV_ENUM(NormalizeModes, CV_MINMAX, CV_L2, CV_L1, CV_C)
|
||||
|
||||
typedef tuple<Size, MatType, NormalizeModes> NormalizeParams;
|
||||
typedef TestBaseWithParam<NormalizeParams> NormalizeFixture;
|
||||
|
||||
OCL_PERF_TEST_P(NormalizeFixture, Normalize,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_TEST_TYPES, NormalizeModes::all()))
|
||||
{
|
||||
const NormalizeParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int type = get<1>(params), mode = get<2>(params);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type), dst(srcSize, type);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::normalize(src, dst, 10, 110, mode);
|
||||
|
||||
SANITY_CHECK(dst, 5e-2);
|
||||
}
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
156
modules/core/perf/opencl/perf_channels.cpp
Normal file
156
modules/core/perf/opencl/perf_channels.cpp
Normal file
@ -0,0 +1,156 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
///////////// Merge////////////////////////
|
||||
|
||||
typedef tuple<Size, MatDepth, int> MergeParams;
|
||||
typedef TestBaseWithParam<MergeParams> MergeFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MergeFixture, Merge,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8U, CV_32F), Values(2, 3)))
|
||||
{
|
||||
const MergeParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int depth = get<1>(params), cn = get<2>(params), dtype = CV_MAKE_TYPE(depth, cn);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, dtype);
|
||||
|
||||
UMat dst(srcSize, dtype);
|
||||
vector<UMat> src(cn);
|
||||
for (vector<UMat>::iterator i = src.begin(), end = src.end(); i != end; ++i)
|
||||
{
|
||||
i->create(srcSize, CV_MAKE_TYPE(depth, 1));
|
||||
declare.in(*i, WARMUP_RNG);
|
||||
}
|
||||
declare.out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::merge(src, dst);
|
||||
|
||||
SANITY_CHECK(dst);
|
||||
}
|
||||
|
||||
///////////// Split ////////////////////////
|
||||
|
||||
typedef MergeParams SplitParams;
|
||||
typedef TestBaseWithParam<SplitParams> SplitFixture;
|
||||
|
||||
OCL_PERF_TEST_P(SplitFixture, DISABLED_Split,
|
||||
::testing::Combine(OCL_TEST_SIZES, OCL_PERF_ENUM(CV_8U, CV_32F), Values(2, 3)))
|
||||
{
|
||||
const SplitParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int depth = get<1>(params), cn = get<2>(params), type = CV_MAKE_TYPE(depth, cn);
|
||||
|
||||
ASSERT_TRUE(cn == 3 || cn == 2);
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat src(srcSize, type);
|
||||
std::vector<UMat> dst(cn, UMat(srcSize, CV_MAKE_TYPE(depth, 1)));
|
||||
|
||||
declare.in(src, WARMUP_RNG);
|
||||
for (int i = 0; i < cn; ++i)
|
||||
declare.in(dst[i]);
|
||||
|
||||
OCL_TEST_CYCLE() cv::split(src, dst);
|
||||
|
||||
ASSERT_EQ(cn, (int)dst.size());
|
||||
|
||||
if (cn == 2)
|
||||
{
|
||||
UMat & dst0 = dst[0], & dst1 = dst[1];
|
||||
SANITY_CHECK(dst0);
|
||||
SANITY_CHECK(dst1);
|
||||
}
|
||||
else
|
||||
{
|
||||
UMat & dst0 = dst[0], & dst1 = dst[1], & dst2 = dst[2];
|
||||
SANITY_CHECK(dst0);
|
||||
SANITY_CHECK(dst1);
|
||||
SANITY_CHECK(dst2);
|
||||
}
|
||||
}
|
||||
|
||||
///////////// MixChannels ////////////////////////
|
||||
|
||||
typedef tuple<Size, MatDepth> MixChannelsParams;
|
||||
typedef TestBaseWithParam<MixChannelsParams> MixChannelsFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MixChannelsFixture, MixChannels,
|
||||
::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
|
||||
OCL_PERF_ENUM(CV_8U, CV_32F)))
|
||||
{
|
||||
const MixChannelsParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int depth = get<1>(params), type = CV_MAKE_TYPE(depth, 2), n = 2;
|
||||
|
||||
checkDeviceMaxMemoryAllocSize(srcSize, type);
|
||||
|
||||
UMat templ(srcSize, type);
|
||||
std::vector<UMat> src(n, templ), dst(n, templ);
|
||||
for (int i = 0; i < n; ++i)
|
||||
declare.in(src[i], WARMUP_RNG).out(dst[i]);
|
||||
|
||||
int fromTo[] = { 1,2, 2,0, 0,3, 3,1 };
|
||||
|
||||
OCL_TEST_CYCLE() cv::mixChannels(src, dst, fromTo, 4);
|
||||
|
||||
UMat & dst0 = dst[0], & dst1 = dst[1];
|
||||
SANITY_CHECK(dst0);
|
||||
SANITY_CHECK(dst1);
|
||||
}
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
99
modules/core/perf/opencl/perf_dxt.cpp
Normal file
99
modules/core/perf/opencl/perf_dxt.cpp
Normal file
@ -0,0 +1,99 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
///////////// dft ////////////////////////
|
||||
|
||||
typedef tuple<Size, int> DftParams;
|
||||
typedef TestBaseWithParam<DftParams> DftFixture;
|
||||
|
||||
OCL_PERF_TEST_P(DftFixture, Dft, ::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
|
||||
Values((int)DFT_ROWS, (int)DFT_SCALE, (int)DFT_INVERSE,
|
||||
(int)DFT_INVERSE | DFT_SCALE, (int)DFT_ROWS | DFT_INVERSE)))
|
||||
{
|
||||
const DftParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int flags = get<1>(params);
|
||||
|
||||
UMat src(srcSize, CV_32FC2), dst(srcSize, CV_32FC2);
|
||||
declare.in(src, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::dft(src, dst, flags | DFT_COMPLEX_OUTPUT);
|
||||
|
||||
SANITY_CHECK(dst, 1e-3);
|
||||
}
|
||||
|
||||
///////////// MulSpectrums ////////////////////////
|
||||
|
||||
typedef tuple<Size, bool> MulSpectrumsParams;
|
||||
typedef TestBaseWithParam<MulSpectrumsParams> MulSpectrumsFixture;
|
||||
|
||||
OCL_PERF_TEST_P(MulSpectrumsFixture, MulSpectrums,
|
||||
::testing::Combine(Values(OCL_SIZE_1, OCL_SIZE_2, OCL_SIZE_3),
|
||||
Bool()))
|
||||
{
|
||||
const MulSpectrumsParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const bool conj = get<1>(params);
|
||||
|
||||
UMat src1(srcSize, CV_32FC2), src2(srcSize, CV_32FC2), dst(srcSize, CV_32FC2);
|
||||
declare.in(src1, src2, WARMUP_RNG).out(dst);
|
||||
|
||||
OCL_TEST_CYCLE() cv::mulSpectrums(src1, src2, dst, 0, conj);
|
||||
|
||||
SANITY_CHECK(dst, 1e-3);
|
||||
}
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
82
modules/core/perf/opencl/perf_gemm.cpp
Normal file
82
modules/core/perf/opencl/perf_gemm.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Fangfang Bai, fangfang@multicorewareinc.com
|
||||
// Jin Ma, jin@multicorewareinc.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_perf.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
///////////// gemm ////////////////////////
|
||||
|
||||
typedef tuple<Size, int> GemmParams;
|
||||
typedef TestBaseWithParam<GemmParams> GemmFixture;
|
||||
|
||||
OCL_PERF_TEST_P(GemmFixture, Gemm, ::testing::Combine(
|
||||
::testing::Values(Size(1000, 1000), Size(1500, 1500)),
|
||||
Values((int)cv::GEMM_3_T, (int)cv::GEMM_3_T | (int)cv::GEMM_2_T)))
|
||||
{
|
||||
GemmParams params = GetParam();
|
||||
const Size srcSize = get<0>(params);
|
||||
const int flags = get<1>(params);
|
||||
|
||||
UMat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1),
|
||||
src3(srcSize, CV_32FC1), dst(srcSize, CV_32FC1);
|
||||
declare.in(src1, src2, src3).out(dst);
|
||||
randu(src1, -10.0f, 10.0f);
|
||||
randu(src2, -10.0f, 10.0f);
|
||||
randu(src3, -10.0f, 10.0f);
|
||||
|
||||
OCL_TEST_CYCLE() cv::gemm(src1, src2, 0.6, src3, 1.5, dst, flags);
|
||||
|
||||
SANITY_CHECK(dst, 0.01);
|
||||
}
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
@ -1409,7 +1409,7 @@ static void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst,
|
||||
int wtype, dims1 = psrc1->dims(), dims2 = psrc2->dims();
|
||||
Size sz1 = dims1 <= 2 ? psrc1->size() : Size();
|
||||
Size sz2 = dims2 <= 2 ? psrc2->size() : Size();
|
||||
bool use_opencl = _dst.kind() == _OutputArray::UMAT && ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
|
||||
bool use_opencl = _dst.isUMat() && ocl::useOpenCL() && dims1 <= 2 && dims2 <= 2;
|
||||
bool src1Scalar = checkScalar(*psrc1, type2, kind1, kind2);
|
||||
bool src2Scalar = checkScalar(*psrc2, type1, kind2, kind1);
|
||||
|
||||
@ -2877,11 +2877,121 @@ static InRangeFunc getInRangeFunc(int depth)
|
||||
return inRangeTab[depth];
|
||||
}
|
||||
|
||||
static bool ocl_inRange( InputArray _src, InputArray _lowerb,
|
||||
InputArray _upperb, OutputArray _dst )
|
||||
{
|
||||
int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
|
||||
Size ssize = _src.size(), lsize = _lowerb.size(), usize = _upperb.size();
|
||||
int stype = _src.type(), ltype = _lowerb.type(), utype = _upperb.type();
|
||||
int sdepth = CV_MAT_DEPTH(stype), ldepth = CV_MAT_DEPTH(ltype), udepth = CV_MAT_DEPTH(utype);
|
||||
int cn = CV_MAT_CN(stype);
|
||||
bool lbScalar = false, ubScalar = false;
|
||||
|
||||
if( (lkind == _InputArray::MATX && skind != _InputArray::MATX) ||
|
||||
ssize != lsize || stype != ltype )
|
||||
{
|
||||
if( !checkScalar(_lowerb, stype, lkind, skind) )
|
||||
CV_Error( CV_StsUnmatchedSizes,
|
||||
"The lower bounary is neither an array of the same size and same type as src, nor a scalar");
|
||||
lbScalar = true;
|
||||
}
|
||||
|
||||
if( (ukind == _InputArray::MATX && skind != _InputArray::MATX) ||
|
||||
ssize != usize || stype != utype )
|
||||
{
|
||||
if( !checkScalar(_upperb, stype, ukind, skind) )
|
||||
CV_Error( CV_StsUnmatchedSizes,
|
||||
"The upper bounary is neither an array of the same size and same type as src, nor a scalar");
|
||||
ubScalar = true;
|
||||
}
|
||||
|
||||
if (lbScalar != ubScalar)
|
||||
return false;
|
||||
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0,
|
||||
haveScalar = lbScalar && ubScalar;
|
||||
|
||||
if ( (!doubleSupport && sdepth == CV_64F) ||
|
||||
(!haveScalar && (sdepth != ldepth || sdepth != udepth)) )
|
||||
return false;
|
||||
|
||||
ocl::Kernel ker("inrange", ocl::core::inrange_oclsrc,
|
||||
format("%s-D cn=%d -D T=%s%s", haveScalar ? "-D HAVE_SCALAR " : "",
|
||||
cn, ocl::typeToStr(sdepth), doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (ker.empty())
|
||||
return false;
|
||||
|
||||
_dst.create(ssize, CV_8UC1);
|
||||
UMat src = _src.getUMat(), dst = _dst.getUMat(), lscalaru, uscalaru;
|
||||
Mat lscalar, uscalar;
|
||||
|
||||
if (lbScalar && ubScalar)
|
||||
{
|
||||
lscalar = _lowerb.getMat();
|
||||
uscalar = _upperb.getMat();
|
||||
|
||||
size_t esz = src.elemSize();
|
||||
size_t blocksize = 36;
|
||||
|
||||
AutoBuffer<uchar> _buf(blocksize*(((int)lbScalar + (int)ubScalar)*esz + cn) + 2*cn*sizeof(int) + 128);
|
||||
uchar *buf = alignPtr(_buf + blocksize*cn, 16);
|
||||
|
||||
if( ldepth != sdepth && sdepth < CV_32S )
|
||||
{
|
||||
int* ilbuf = (int*)alignPtr(buf + blocksize*esz, 16);
|
||||
int* iubuf = ilbuf + cn;
|
||||
|
||||
BinaryFunc sccvtfunc = getConvertFunc(ldepth, CV_32S);
|
||||
sccvtfunc(lscalar.data, 0, 0, 0, (uchar*)ilbuf, 0, Size(cn, 1), 0);
|
||||
sccvtfunc(uscalar.data, 0, 0, 0, (uchar*)iubuf, 0, Size(cn, 1), 0);
|
||||
int minval = cvRound(getMinVal(sdepth)), maxval = cvRound(getMaxVal(sdepth));
|
||||
|
||||
for( int k = 0; k < cn; k++ )
|
||||
{
|
||||
if( ilbuf[k] > iubuf[k] || ilbuf[k] > maxval || iubuf[k] < minval )
|
||||
ilbuf[k] = minval+1, iubuf[k] = minval;
|
||||
}
|
||||
lscalar = Mat(cn, 1, CV_32S, ilbuf);
|
||||
uscalar = Mat(cn, 1, CV_32S, iubuf);
|
||||
}
|
||||
|
||||
lscalar.convertTo(lscalar, stype);
|
||||
uscalar.convertTo(uscalar, stype);
|
||||
}
|
||||
else
|
||||
{
|
||||
lscalaru = _lowerb.getUMat();
|
||||
uscalaru = _upperb.getUMat();
|
||||
}
|
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
dstarg = ocl::KernelArg::WriteOnly(dst);
|
||||
|
||||
if (haveScalar)
|
||||
{
|
||||
lscalar.copyTo(lscalaru);
|
||||
uscalar.copyTo(uscalaru);
|
||||
|
||||
ker.args(srcarg, dstarg, ocl::KernelArg::PtrReadOnly(lscalaru),
|
||||
ocl::KernelArg::PtrReadOnly(uscalaru));
|
||||
}
|
||||
else
|
||||
ker.args(srcarg, dstarg, ocl::KernelArg::ReadOnlyNoSize(lscalaru),
|
||||
ocl::KernelArg::ReadOnlyNoSize(uscalaru));
|
||||
|
||||
size_t globalsize[2] = { ssize.width, ssize.height };
|
||||
return ker.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::inRange(InputArray _src, InputArray _lowerb,
|
||||
InputArray _upperb, OutputArray _dst)
|
||||
{
|
||||
if (ocl::useOpenCL() && _src.dims() <= 2 && _lowerb.dims() <= 2 &&
|
||||
_upperb.dims() <= 2 && _dst.isUMat() && ocl_inRange(_src, _lowerb, _upperb, _dst))
|
||||
return;
|
||||
|
||||
int skind = _src.kind(), lkind = _lowerb.kind(), ukind = _upperb.kind();
|
||||
Mat src = _src.getMat(), lb = _lowerb.getMat(), ub = _upperb.getMat();
|
||||
|
||||
@ -2905,14 +3015,14 @@ void cv::inRange(InputArray _src, InputArray _lowerb,
|
||||
ubScalar = true;
|
||||
}
|
||||
|
||||
CV_Assert( ((int)lbScalar ^ (int)ubScalar) == 0 );
|
||||
CV_Assert(lbScalar == ubScalar);
|
||||
|
||||
int cn = src.channels(), depth = src.depth();
|
||||
|
||||
size_t esz = src.elemSize();
|
||||
size_t blocksize0 = (size_t)(BLOCK_SIZE + esz-1)/esz;
|
||||
|
||||
_dst.create(src.dims, src.size, CV_8U);
|
||||
_dst.create(src.dims, src.size, CV_8UC1);
|
||||
Mat dst = _dst.getMat();
|
||||
InRangeFunc func = getInRangeFunc(depth);
|
||||
|
||||
|
@ -612,16 +612,111 @@ void cv::mixChannels( const Mat* src, size_t nsrcs, Mat* dst, size_t ndsts, cons
|
||||
}
|
||||
}
|
||||
|
||||
namespace cv {
|
||||
|
||||
static void getUMatIndex(const std::vector<UMat> & um, int cn, int & idx, int & cnidx)
|
||||
{
|
||||
int totalChannels = 0;
|
||||
for (size_t i = 0, size = um.size(); i < size; ++i)
|
||||
{
|
||||
int ccn = um[i].channels();
|
||||
totalChannels += ccn;
|
||||
|
||||
if (totalChannels == cn)
|
||||
{
|
||||
idx = (int)(i + 1);
|
||||
cnidx = 0;
|
||||
return;
|
||||
}
|
||||
else if (totalChannels > cn)
|
||||
{
|
||||
idx = (int)i;
|
||||
cnidx = i == 0 ? cn : (cn - totalChannels + ccn);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
idx = cnidx = -1;
|
||||
}
|
||||
|
||||
static bool ocl_mixChannels(InputArrayOfArrays _src, InputOutputArrayOfArrays _dst,
|
||||
const int* fromTo, size_t npairs)
|
||||
{
|
||||
const std::vector<UMat> & src = *(const std::vector<UMat> *)_src.getObj();
|
||||
std::vector<UMat> & dst = *(std::vector<UMat> *)_dst.getObj();
|
||||
|
||||
size_t nsrc = src.size(), ndst = dst.size();
|
||||
CV_Assert(nsrc > 0 && ndst > 0);
|
||||
|
||||
Size size = src[0].size();
|
||||
int depth = src[0].depth(), esz = CV_ELEM_SIZE(depth);
|
||||
|
||||
for (size_t i = 1, ssize = src.size(); i < ssize; ++i)
|
||||
CV_Assert(src[i].size() == size && src[i].depth() == depth);
|
||||
for (size_t i = 0, dsize = dst.size(); i < dsize; ++i)
|
||||
CV_Assert(dst[i].size() == size && dst[i].depth() == depth);
|
||||
|
||||
String declsrc, decldst, declproc, declcn;
|
||||
std::vector<UMat> srcargs(npairs), dstargs(npairs);
|
||||
|
||||
for (size_t i = 0; i < npairs; ++i)
|
||||
{
|
||||
int scn = fromTo[i<<1], dcn = fromTo[(i<<1) + 1];
|
||||
int src_idx, src_cnidx, dst_idx, dst_cnidx;
|
||||
|
||||
getUMatIndex(src, scn, src_idx, src_cnidx);
|
||||
getUMatIndex(dst, dcn, dst_idx, dst_cnidx);
|
||||
|
||||
CV_Assert(dst_idx >= 0 && src_idx >= 0);
|
||||
|
||||
srcargs[i] = src[src_idx];
|
||||
srcargs[i].offset += src_cnidx * esz;
|
||||
|
||||
dstargs[i] = dst[dst_idx];
|
||||
dstargs[i].offset += dst_cnidx * esz;
|
||||
|
||||
declsrc += format("DECLARE_INPUT_MAT(%d)", i);
|
||||
decldst += format("DECLARE_OUTPUT_MAT(%d)", i);
|
||||
declproc += format("PROCESS_ELEM(%d)", i);
|
||||
declcn += format(" -D scn%d=%d -D dcn%d=%d", i, src[src_idx].channels(), i, dst[dst_idx].channels());
|
||||
}
|
||||
|
||||
ocl::Kernel k("mixChannels", ocl::core::mixchannels_oclsrc,
|
||||
format("-D T=%s -D DECLARE_INPUT_MATS=%s -D DECLARE_OUTPUT_MATS=%s"
|
||||
" -D PROCESS_ELEMS=%s%s", ocl::memopTypeToStr(depth),
|
||||
declsrc.c_str(), decldst.c_str(), declproc.c_str(), declcn.c_str()));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
int argindex = 0;
|
||||
for (size_t i = 0; i < npairs; ++i)
|
||||
argindex = k.set(argindex, ocl::KernelArg::ReadOnlyNoSize(srcargs[i]));
|
||||
for (size_t i = 0; i < npairs; ++i)
|
||||
argindex = k.set(argindex, ocl::KernelArg::WriteOnlyNoSize(dstargs[i]));
|
||||
k.set(k.set(argindex, size.height), size.width);
|
||||
|
||||
size_t globalsize[2] = { size.width, size.height };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
|
||||
const int* fromTo, size_t npairs)
|
||||
{
|
||||
if(npairs == 0)
|
||||
if (npairs == 0 || fromTo == NULL)
|
||||
return;
|
||||
|
||||
if (ocl::useOpenCL() && src.isUMatVector() && dst.isUMatVector() &&
|
||||
ocl_mixChannels(src, dst, fromTo, npairs))
|
||||
return;
|
||||
|
||||
bool src_is_mat = src.kind() != _InputArray::STD_VECTOR_MAT &&
|
||||
src.kind() != _InputArray::STD_VECTOR_VECTOR;
|
||||
src.kind() != _InputArray::STD_VECTOR_VECTOR &&
|
||||
src.kind() != _InputArray::STD_VECTOR_UMAT;
|
||||
bool dst_is_mat = dst.kind() != _InputArray::STD_VECTOR_MAT &&
|
||||
dst.kind() != _InputArray::STD_VECTOR_VECTOR;
|
||||
dst.kind() != _InputArray::STD_VECTOR_VECTOR &&
|
||||
dst.kind() != _InputArray::STD_VECTOR_UMAT;
|
||||
int i;
|
||||
int nsrc = src_is_mat ? 1 : (int)src.total();
|
||||
int ndst = dst_is_mat ? 1 : (int)dst.total();
|
||||
@ -639,12 +734,19 @@ void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
|
||||
void cv::mixChannels(InputArrayOfArrays src, InputOutputArrayOfArrays dst,
|
||||
const std::vector<int>& fromTo)
|
||||
{
|
||||
if(fromTo.empty())
|
||||
if (fromTo.empty())
|
||||
return;
|
||||
|
||||
if (ocl::useOpenCL() && src.isUMatVector() && dst.isUMatVector() &&
|
||||
ocl_mixChannels(src, dst, &fromTo[0], fromTo.size()>>1))
|
||||
return;
|
||||
|
||||
bool src_is_mat = src.kind() != _InputArray::STD_VECTOR_MAT &&
|
||||
src.kind() != _InputArray::STD_VECTOR_VECTOR;
|
||||
src.kind() != _InputArray::STD_VECTOR_VECTOR &&
|
||||
src.kind() != _InputArray::STD_VECTOR_UMAT;
|
||||
bool dst_is_mat = dst.kind() != _InputArray::STD_VECTOR_MAT &&
|
||||
dst.kind() != _InputArray::STD_VECTOR_VECTOR;
|
||||
dst.kind() != _InputArray::STD_VECTOR_VECTOR &&
|
||||
dst.kind() != _InputArray::STD_VECTOR_UMAT;
|
||||
int i;
|
||||
int nsrc = src_is_mat ? 1 : (int)src.total();
|
||||
int ndst = dst_is_mat ? 1 : (int)dst.total();
|
||||
@ -1161,10 +1263,49 @@ static BinaryFunc getConvertScaleFunc(int sdepth, int ddepth)
|
||||
return cvtScaleTab[CV_MAT_DEPTH(ddepth)][CV_MAT_DEPTH(sdepth)];
|
||||
}
|
||||
|
||||
static bool ocl_convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
|
||||
{
|
||||
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if (!doubleSupport && depth == CV_64F)
|
||||
return false;
|
||||
|
||||
char cvt[2][50];
|
||||
int wdepth = std::max(depth, CV_32F);
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D OP_CONVERT_SCALE_ABS -D UNARY_OP -D dstT=uchar -D srcT1=%s"
|
||||
" -D workT=%s -D convertToWT1=%s -D convertToDT=%s%s",
|
||||
ocl::typeToStr(depth), ocl::typeToStr(wdepth),
|
||||
ocl::convertTypeStr(depth, wdepth, 1, cvt[0]),
|
||||
ocl::convertTypeStr(wdepth, CV_8U, 1, cvt[1]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
_dst.createSameSize(_src, CV_8UC(cn));
|
||||
UMat src = _src.getUMat(), dst = _dst.getUMat();
|
||||
|
||||
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
|
||||
dstarg = ocl::KernelArg::WriteOnly(dst, cn);
|
||||
|
||||
if (wdepth == CV_32F)
|
||||
k.args(srcarg, dstarg, (float)alpha, (float)beta);
|
||||
else if (wdepth == CV_64F)
|
||||
k.args(srcarg, dstarg, alpha, beta);
|
||||
|
||||
size_t globalsize[2] = { src.cols * cn, src.rows };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::convertScaleAbs( InputArray _src, OutputArray _dst, double alpha, double beta )
|
||||
{
|
||||
if (ocl::useOpenCL() && _src.dims() <= 2 && _dst.isUMat() &&
|
||||
ocl_convertScaleAbs(_src, _dst, alpha, beta))
|
||||
return;
|
||||
|
||||
Mat src = _src.getMat();
|
||||
int cn = src.channels();
|
||||
double scale[] = {alpha, beta};
|
||||
|
@ -1726,8 +1726,8 @@ static bool ocl_dft(InputArray _src, OutputArray _dst, int flags)
|
||||
void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
|
||||
{
|
||||
#ifdef HAVE_CLAMDFFT
|
||||
if (ocl::useOpenCL() && ocl::haveAmdFft() && _dst.isUMat() && _src0.dims() <= 2
|
||||
&& nonzero_rows == 0 && ocl_dft(_src0, _dst, flags))
|
||||
if (ocl::useOpenCL() && ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
|
||||
_dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0 && ocl_dft(_src0, _dst, flags))
|
||||
return;
|
||||
#endif
|
||||
|
||||
@ -2577,7 +2577,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
|
||||
|
||||
DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
|
||||
|
||||
if( (flags & DFT_ROWS) || src.rows == 1 ||
|
||||
if( (flags & DCT_ROWS) || src.rows == 1 ||
|
||||
(src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
|
||||
{
|
||||
stage = end_stage = 0;
|
||||
@ -2597,7 +2597,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
|
||||
{
|
||||
len = src.cols;
|
||||
count = src.rows;
|
||||
if( len == 1 && !(flags & DFT_ROWS) )
|
||||
if( len == 1 && !(flags & DCT_ROWS) )
|
||||
{
|
||||
len = src.rows;
|
||||
count = 1;
|
||||
|
@ -2364,12 +2364,31 @@ bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double ma
|
||||
return badPt.x < 0;
|
||||
}
|
||||
|
||||
static bool ocl_patchNaNs( InputOutputArray _a, float value )
|
||||
{
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=int"));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
UMat a = _a.getUMat();
|
||||
int cn = a.channels();
|
||||
|
||||
k.args(ocl::KernelArg::ReadOnlyNoSize(a),
|
||||
ocl::KernelArg::WriteOnly(a), (float)value);
|
||||
|
||||
size_t globalsize[2] = { a.cols * cn, a.rows };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
void patchNaNs( InputOutputArray _a, double _val )
|
||||
{
|
||||
Mat a = _a.getMat();
|
||||
CV_Assert( a.depth() == CV_32F );
|
||||
CV_Assert( _a.depth() == CV_32F );
|
||||
|
||||
if (ocl::useOpenCL() && _a.isUMat() && _a.dims() <= 2 && ocl_patchNaNs(_a, (float)_val))
|
||||
return;
|
||||
|
||||
Mat a = _a.getMat();
|
||||
const Mat* arrays[] = {&a, 0};
|
||||
int* ptrs[1];
|
||||
NAryMatIterator it(arrays, (uchar**)ptrs);
|
||||
|
@ -41,6 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
|
||||
|
||||
#ifdef HAVE_IPP
|
||||
@ -724,7 +725,7 @@ static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
|
||||
|
||||
UMat A = matA.getUMat(), B = matB.getUMat(), D = matD.getUMat();
|
||||
if (haveC)
|
||||
ctrans ? transpose(matC, D) : matC.getMat().copyTo(D); // TODO fix it as soon as .copyTo works as expected
|
||||
ctrans ? transpose(matC, D) : matC.copyTo(D);
|
||||
else
|
||||
D.setTo(Scalar::all(0));
|
||||
|
||||
@ -2154,20 +2155,61 @@ static void scaleAdd_64f(const double* src1, const double* src2, double* dst,
|
||||
|
||||
typedef void (*ScaleAddFunc)(const uchar* src1, const uchar* src2, uchar* dst, int len, const void* alpha);
|
||||
|
||||
static bool ocl_scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst, int type )
|
||||
{
|
||||
int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), wdepth = std::max(depth, CV_32F);
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
Size size = _src1.size();
|
||||
|
||||
if ( (!doubleSupport && depth == CV_64F) || size != _src2.size() )
|
||||
return false;
|
||||
|
||||
char cvt[2][50];
|
||||
ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
|
||||
format("-D OP_SCALE_ADD -D BINARY_OP -D dstT=%s -D workT=%s -D convertToWT1=%s"
|
||||
" -D srcT1=dstT -D srcT2=dstT -D convertToDT=%s%s", ocl::typeToStr(depth),
|
||||
ocl::typeToStr(wdepth), ocl::convertTypeStr(depth, wdepth, 1, cvt[0]),
|
||||
ocl::convertTypeStr(wdepth, depth, 1, cvt[1]),
|
||||
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
|
||||
if (k.empty())
|
||||
return false;
|
||||
|
||||
_dst.create(size, type);
|
||||
UMat src1 = _src1.getUMat(), src2 = _src2.getUMat(), dst = _dst.getUMat();
|
||||
|
||||
ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
|
||||
src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
|
||||
dstarg = ocl::KernelArg::WriteOnly(dst, cn);
|
||||
|
||||
if (wdepth == CV_32F)
|
||||
k.args(src1arg, src2arg, dstarg, (float)alpha);
|
||||
else
|
||||
k.args(src1arg, src2arg, dstarg, alpha);
|
||||
|
||||
size_t globalsize[2] = { dst.cols * cn, dst.rows };
|
||||
return k.run(2, globalsize, NULL, false);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::scaleAdd( InputArray _src1, double alpha, InputArray _src2, OutputArray _dst )
|
||||
{
|
||||
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
|
||||
int depth = src1.depth(), cn = src1.channels();
|
||||
int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
|
||||
CV_Assert( type == _src2.type() );
|
||||
|
||||
if (ocl::useOpenCL() && _src1.dims() <= 2 && _src2.dims() <= 2 && _dst.isUMat() &&
|
||||
ocl_scaleAdd(_src1, alpha, _src2, _dst, type))
|
||||
return;
|
||||
|
||||
CV_Assert( src1.type() == src2.type() );
|
||||
if( depth < CV_32F )
|
||||
{
|
||||
addWeighted(_src1, alpha, _src2, 1, 0, _dst, depth);
|
||||
return;
|
||||
}
|
||||
|
||||
Mat src1 = _src1.getMat(), src2 = _src2.getMat();
|
||||
CV_Assert(src1.size == src2.size);
|
||||
|
||||
_dst.create(src1.dims, src1.size, src1.type());
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
|
@ -1430,6 +1430,16 @@ Size _InputArray::size(int i) const
|
||||
return vv[i].size();
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR_UMAT )
|
||||
{
|
||||
const std::vector<UMat>& vv = *(const std::vector<UMat>*)obj;
|
||||
if( i < 0 )
|
||||
return vv.empty() ? Size() : Size((int)vv.size(), 1);
|
||||
CV_Assert( i < (int)vv.size() );
|
||||
|
||||
return vv[i].size();
|
||||
}
|
||||
|
||||
if( k == OPENGL_BUFFER )
|
||||
{
|
||||
CV_Assert( i < 0 );
|
||||
@ -2262,6 +2272,12 @@ void _OutputArray::release() const
|
||||
return;
|
||||
}
|
||||
|
||||
if( k == UMAT )
|
||||
{
|
||||
((UMat*)obj)->release();
|
||||
return;
|
||||
}
|
||||
|
||||
if( k == GPU_MAT )
|
||||
{
|
||||
((cuda::GpuMat*)obj)->release();
|
||||
@ -2301,6 +2317,12 @@ void _OutputArray::release() const
|
||||
return;
|
||||
}
|
||||
|
||||
if( k == STD_VECTOR_UMAT )
|
||||
{
|
||||
((std::vector<UMat>*)obj)->clear();
|
||||
return;
|
||||
}
|
||||
|
||||
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
|
||||
}
|
||||
|
||||
@ -2760,39 +2782,24 @@ void cv::transpose( InputArray _src, OutputArray _dst )
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////// completeSymm /////////////////////////////////////////
|
||||
|
||||
void cv::completeSymm( InputOutputArray _m, bool LtoR )
|
||||
{
|
||||
Mat m = _m.getMat();
|
||||
CV_Assert( m.dims <= 2 );
|
||||
size_t step = m.step, esz = m.elemSize();
|
||||
CV_Assert( m.dims <= 2 && m.rows == m.cols );
|
||||
|
||||
int i, j, nrows = m.rows, type = m.type();
|
||||
int j0 = 0, j1 = nrows;
|
||||
CV_Assert( m.rows == m.cols );
|
||||
int rows = m.rows;
|
||||
int j0 = 0, j1 = rows;
|
||||
|
||||
if( type == CV_32FC1 || type == CV_32SC1 )
|
||||
uchar* data = m.data;
|
||||
for( int i = 0; i < rows; i++ )
|
||||
{
|
||||
int* data = (int*)m.data;
|
||||
size_t step = m.step/sizeof(data[0]);
|
||||
for( i = 0; i < nrows; i++ )
|
||||
{
|
||||
if( !LtoR ) j1 = i; else j0 = i+1;
|
||||
for( j = j0; j < j1; j++ )
|
||||
data[i*step + j] = data[j*step + i];
|
||||
}
|
||||
if( !LtoR ) j1 = i; else j0 = i+1;
|
||||
for( int j = j0; j < j1; j++ )
|
||||
memcpy(data + (i*step + j*esz), data + (j*step + i*esz), esz);
|
||||
}
|
||||
else if( type == CV_64FC1 )
|
||||
{
|
||||
double* data = (double*)m.data;
|
||||
size_t step = m.step/sizeof(data[0]);
|
||||
for( i = 0; i < nrows; i++ )
|
||||
{
|
||||
if( !LtoR ) j1 = i; else j0 = i+1;
|
||||
for( j = j0; j < j1; j++ )
|
||||
data[i*step + j] = data[j*step + i];
|
||||
}
|
||||
}
|
||||
else
|
||||
CV_Error( CV_StsUnsupportedFormat, "" );
|
||||
}
|
||||
|
||||
|
||||
|
@ -41,6 +41,9 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iostream> // std::cerr
|
||||
|
||||
#include "opencv2/core/opencl/runtime/opencl_clamdblas.hpp"
|
||||
#include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
|
||||
@ -1905,6 +1908,219 @@ const Device& Device::getDefault()
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename Functor, typename ObjectType>
|
||||
inline cl_int getStringInfo(Functor f, ObjectType obj, cl_uint name, std::string& param)
|
||||
{
|
||||
::size_t required;
|
||||
cl_int err = f(obj, name, 0, NULL, &required);
|
||||
if (err != CL_SUCCESS)
|
||||
return err;
|
||||
|
||||
param.clear();
|
||||
if (required > 0)
|
||||
{
|
||||
AutoBuffer<char> buf(required + 1);
|
||||
char* ptr = (char*)buf; // cleanup is not needed
|
||||
err = f(obj, name, required, ptr, NULL);
|
||||
if (err != CL_SUCCESS)
|
||||
return err;
|
||||
param = ptr;
|
||||
}
|
||||
|
||||
return CL_SUCCESS;
|
||||
};
|
||||
|
||||
static void split(const std::string &s, char delim, std::vector<std::string> &elems) {
|
||||
elems.clear();
|
||||
if (s.size() == 0)
|
||||
return;
|
||||
std::istringstream ss(s);
|
||||
std::string item;
|
||||
while (!ss.eof())
|
||||
{
|
||||
std::getline(ss, item, delim);
|
||||
elems.push_back(item);
|
||||
}
|
||||
}
|
||||
|
||||
// Layout: <Platform>:<CPU|GPU|ACCELERATOR|nothing=GPU/CPU>:<deviceName>
|
||||
// Sample: AMD:GPU:
|
||||
// Sample: AMD:GPU:Tahiti
|
||||
// Sample: :GPU|CPU: = '' = ':' = '::'
|
||||
static bool parseOpenCLDeviceConfiguration(const std::string& configurationStr,
|
||||
std::string& platform, std::vector<std::string>& deviceTypes, std::string& deviceNameOrID)
|
||||
{
|
||||
std::vector<std::string> parts;
|
||||
split(configurationStr, ':', parts);
|
||||
if (parts.size() > 3)
|
||||
{
|
||||
std::cerr << "ERROR: Invalid configuration string for OpenCL device" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (parts.size() > 2)
|
||||
deviceNameOrID = parts[2];
|
||||
if (parts.size() > 1)
|
||||
{
|
||||
split(parts[1], '|', deviceTypes);
|
||||
}
|
||||
if (parts.size() > 0)
|
||||
{
|
||||
platform = parts[0];
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static cl_device_id selectOpenCLDevice()
|
||||
{
|
||||
std::string platform;
|
||||
std::vector<std::string> deviceTypes;
|
||||
std::string deviceName;
|
||||
const char* configuration = getenv("OPENCV_OPENCL_DEVICE");
|
||||
if (configuration)
|
||||
{
|
||||
if (!parseOpenCLDeviceConfiguration(std::string(configuration), platform, deviceTypes, deviceName))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool isID = false;
|
||||
int deviceID = -1;
|
||||
if (deviceName.length() == 1)
|
||||
// We limit ID range to 0..9, because we want to write:
|
||||
// - '2500' to mean i5-2500
|
||||
// - '8350' to mean AMD FX-8350
|
||||
// - '650' to mean GeForce 650
|
||||
// To extend ID range change condition to '> 0'
|
||||
{
|
||||
isID = true;
|
||||
for (size_t i = 0; i < deviceName.length(); i++)
|
||||
{
|
||||
if (!isdigit(deviceName[i]))
|
||||
{
|
||||
isID = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isID)
|
||||
{
|
||||
deviceID = atoi(deviceName.c_str());
|
||||
CV_Assert(deviceID >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
cl_int status = CL_SUCCESS;
|
||||
std::vector<cl_platform_id> platforms;
|
||||
{
|
||||
cl_uint numPlatforms = 0;
|
||||
status = clGetPlatformIDs(0, NULL, &numPlatforms);
|
||||
CV_Assert(status == CL_SUCCESS);
|
||||
if (numPlatforms == 0)
|
||||
return NULL;
|
||||
platforms.resize((size_t)numPlatforms);
|
||||
status = clGetPlatformIDs(numPlatforms, &platforms[0], &numPlatforms);
|
||||
CV_Assert(status == CL_SUCCESS);
|
||||
platforms.resize(numPlatforms);
|
||||
}
|
||||
|
||||
int selectedPlatform = -1;
|
||||
if (platform.length() > 0)
|
||||
{
|
||||
for (size_t i = 0; i < platforms.size(); i++)
|
||||
{
|
||||
std::string name;
|
||||
status = getStringInfo(clGetPlatformInfo, platforms[i], CL_PLATFORM_NAME, name);
|
||||
CV_Assert(status == CL_SUCCESS);
|
||||
if (name.find(platform) != std::string::npos)
|
||||
{
|
||||
selectedPlatform = (int)i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (selectedPlatform == -1)
|
||||
{
|
||||
std::cerr << "ERROR: Can't find OpenCL platform by name: " << platform << std::endl;
|
||||
goto not_found;
|
||||
}
|
||||
}
|
||||
|
||||
if (deviceTypes.size() == 0)
|
||||
{
|
||||
if (!isID)
|
||||
{
|
||||
deviceTypes.push_back("GPU");
|
||||
deviceTypes.push_back("CPU");
|
||||
}
|
||||
else
|
||||
{
|
||||
deviceTypes.push_back("ALL");
|
||||
}
|
||||
}
|
||||
for (size_t t = 0; t < deviceTypes.size(); t++)
|
||||
{
|
||||
int deviceType = 0;
|
||||
if (deviceTypes[t] == "GPU")
|
||||
{
|
||||
deviceType = Device::TYPE_GPU;
|
||||
}
|
||||
else if (deviceTypes[t] == "CPU")
|
||||
{
|
||||
deviceType = Device::TYPE_CPU;
|
||||
}
|
||||
else if (deviceTypes[t] == "ACCELERATOR")
|
||||
{
|
||||
deviceType = Device::TYPE_ACCELERATOR;
|
||||
}
|
||||
else if (deviceTypes[t] == "ALL")
|
||||
{
|
||||
deviceType = Device::TYPE_ALL;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "ERROR: Unsupported device type for OpenCL device (GPU, CPU, ACCELERATOR): " << deviceTypes[t] << std::endl;
|
||||
goto not_found;
|
||||
}
|
||||
|
||||
std::vector<cl_device_id> devices; // TODO Use clReleaseDevice to cleanup
|
||||
for (int i = selectedPlatform >= 0 ? selectedPlatform : 0;
|
||||
(selectedPlatform >= 0 ? i == selectedPlatform : true) && (i < (int)platforms.size());
|
||||
i++)
|
||||
{
|
||||
cl_uint count = 0;
|
||||
status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &count);
|
||||
CV_Assert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
|
||||
if (count == 0)
|
||||
continue;
|
||||
size_t base = devices.size();
|
||||
devices.resize(base + count);
|
||||
status = clGetDeviceIDs(platforms[i], deviceType, count, &devices[base], &count);
|
||||
CV_Assert(status == CL_SUCCESS || status == CL_DEVICE_NOT_FOUND);
|
||||
}
|
||||
|
||||
for (size_t i = (isID ? deviceID : 0);
|
||||
(isID ? (i == (size_t)deviceID) : true) && (i < devices.size());
|
||||
i++)
|
||||
{
|
||||
std::string name;
|
||||
status = getStringInfo(clGetDeviceInfo, devices[i], CL_DEVICE_NAME, name);
|
||||
CV_Assert(status == CL_SUCCESS);
|
||||
if (isID || name.find(deviceName) != std::string::npos)
|
||||
{
|
||||
// TODO check for OpenCL 1.1
|
||||
return devices[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
not_found:
|
||||
std::cerr << "ERROR: Required OpenCL device not found, check configuration: " << (configuration == NULL ? "" : configuration) << std::endl
|
||||
<< " Platform: " << (platform.length() == 0 ? "any" : platform) << std::endl
|
||||
<< " Device types: ";
|
||||
for (size_t t = 0; t < deviceTypes.size(); t++)
|
||||
{
|
||||
std::cerr << deviceTypes[t] << " ";
|
||||
}
|
||||
std::cerr << std::endl << " Device name: " << (deviceName.length() == 0 ? "any" : deviceName) << std::endl;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct Context2::Impl
|
||||
{
|
||||
Impl()
|
||||
@ -1913,6 +2129,42 @@ struct Context2::Impl
|
||||
handle = 0;
|
||||
}
|
||||
|
||||
void setDefault()
|
||||
{
|
||||
CV_Assert(handle == NULL);
|
||||
|
||||
cl_device_id d = selectOpenCLDevice();
|
||||
|
||||
if (d == NULL)
|
||||
return;
|
||||
|
||||
cl_platform_id pl = NULL;
|
||||
cl_int status = clGetDeviceInfo(d, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &pl, NULL);
|
||||
CV_Assert(status == CL_SUCCESS);
|
||||
|
||||
cl_context_properties prop[] =
|
||||
{
|
||||
CL_CONTEXT_PLATFORM, (cl_context_properties)pl,
|
||||
0
|
||||
};
|
||||
|
||||
// !!! in the current implementation force the number of devices to 1 !!!
|
||||
int nd = 1;
|
||||
|
||||
handle = clCreateContext(prop, nd, &d, 0, 0, &status);
|
||||
CV_Assert(status == CL_SUCCESS);
|
||||
bool ok = handle != 0 && status >= 0;
|
||||
if( ok )
|
||||
{
|
||||
devices.resize(nd);
|
||||
devices[0].set(d);
|
||||
}
|
||||
else
|
||||
{
|
||||
handle = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Impl(int dtype0)
|
||||
{
|
||||
refcount = 1;
|
||||
@ -2022,6 +2274,21 @@ Context2::Context2(int dtype)
|
||||
create(dtype);
|
||||
}
|
||||
|
||||
bool Context2::create()
|
||||
{
|
||||
if( !haveOpenCL() )
|
||||
return false;
|
||||
if(p)
|
||||
p->release();
|
||||
p = new Impl();
|
||||
if(!p->handle)
|
||||
{
|
||||
delete p;
|
||||
p = 0;
|
||||
}
|
||||
return p != 0;
|
||||
}
|
||||
|
||||
bool Context2::create(int dtype0)
|
||||
{
|
||||
if( !haveOpenCL() )
|
||||
@ -2039,7 +2306,11 @@ bool Context2::create(int dtype0)
|
||||
|
||||
Context2::~Context2()
|
||||
{
|
||||
p->release();
|
||||
if (p)
|
||||
{
|
||||
p->release();
|
||||
p = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
Context2::Context2(const Context2& c)
|
||||
@ -2062,7 +2333,7 @@ Context2& Context2::operator = (const Context2& c)
|
||||
|
||||
void* Context2::ptr() const
|
||||
{
|
||||
return p->handle;
|
||||
return p == NULL ? NULL : p->handle;
|
||||
}
|
||||
|
||||
size_t Context2::ndevices() const
|
||||
@ -2081,23 +2352,16 @@ Context2& Context2::getDefault(bool initialize)
|
||||
static Context2 ctx;
|
||||
if(!ctx.p && haveOpenCL())
|
||||
{
|
||||
if (!ctx.p)
|
||||
ctx.p = new Impl();
|
||||
if (initialize)
|
||||
{
|
||||
// do not create new Context2 right away.
|
||||
// First, try to retrieve existing context of the same type.
|
||||
// In its turn, Platform::getContext() may call Context2::create()
|
||||
// if there is no such context.
|
||||
ctx.create(Device::TYPE_ACCELERATOR);
|
||||
if(!ctx.p)
|
||||
ctx.create(Device::TYPE_DGPU);
|
||||
if(!ctx.p)
|
||||
ctx.create(Device::TYPE_IGPU);
|
||||
if(!ctx.p)
|
||||
ctx.create(Device::TYPE_CPU);
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx.p = new Impl();
|
||||
if (ctx.p->handle == NULL)
|
||||
ctx.p->setDefault();
|
||||
}
|
||||
}
|
||||
|
||||
@ -2553,6 +2817,16 @@ size_t Kernel::workGroupSize() const
|
||||
sizeof(val), &val, &retsz) >= 0 ? val : 0;
|
||||
}
|
||||
|
||||
size_t Kernel::preferedWorkGroupSizeMultiple() const
|
||||
{
|
||||
if(!p)
|
||||
return 0;
|
||||
size_t val = 0, retsz = 0;
|
||||
cl_device_id dev = (cl_device_id)Device::getDefault().ptr();
|
||||
return clGetKernelWorkGroupInfo(p->handle, dev, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
|
||||
sizeof(val), &val, &retsz) >= 0 ? val : 0;
|
||||
}
|
||||
|
||||
bool Kernel::compileWorkGroupSize(size_t wsz[]) const
|
||||
{
|
||||
if(!p || !wsz)
|
||||
@ -2616,11 +2890,16 @@ struct Program::Impl
|
||||
if( retval >= 0 )
|
||||
{
|
||||
errmsg = String(buf);
|
||||
CV_Error_(Error::StsAssert, ("OpenCL program can not be built: %s", errmsg.c_str()));
|
||||
printf("OpenCL program can not be built: %s", errmsg.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if( handle )
|
||||
{
|
||||
clReleaseProgram(handle);
|
||||
handle = NULL;
|
||||
}
|
||||
}
|
||||
CV_Assert(retval >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,6 +91,9 @@
|
||||
|
||||
#else
|
||||
|
||||
#ifndef convertToWT2
|
||||
#define convertToWT2 convertToWT1
|
||||
#endif
|
||||
#define srcelem1 convertToWT1(*(__global srcT1*)(srcptr1 + src1_index))
|
||||
#define srcelem2 convertToWT2(*(__global srcT2*)(srcptr2 + src2_index))
|
||||
|
||||
@ -223,13 +226,17 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
|
||||
#define convertToWT2
|
||||
#define PROCESS_ELEM dstelem = convert_uchar(srcelem1 CMP_OPERATOR srcelem2 ? 255 : 0)
|
||||
|
||||
#elif defined OP_CONVERT
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1)
|
||||
|
||||
#elif defined OP_CONVERT_SCALE
|
||||
#elif defined OP_CONVERT_SCALE_ABS
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT alpha, workT beta
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1*alpha + beta)
|
||||
#define PROCESS_ELEM \
|
||||
workT value = srcelem1 * alpha + beta; \
|
||||
dstelem = convertToDT(value >= 0 ? value : -value)
|
||||
|
||||
#elif defined OP_SCALE_ADD
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , workT alpha
|
||||
#define PROCESS_ELEM dstelem = convertToDT(srcelem1 * alpha + srcelem2)
|
||||
|
||||
#elif defined OP_CTP_AD || defined OP_CTP_AR
|
||||
#ifdef OP_CTP_AD
|
||||
@ -264,6 +271,13 @@ dstelem = v > (dstT)(0) ? log(v) : log(-v)
|
||||
dstelem = cos(alpha) * x; \
|
||||
dstelem2 = sin(alpha) * x
|
||||
|
||||
#elif defined OP_PATCH_NANS
|
||||
#undef EXTRA_PARAMS
|
||||
#define EXTRA_PARAMS , int val
|
||||
#define PROCESS_ELEM \
|
||||
if (( srcelem1 & 0x7fffffff) > 0x7f800000 ) \
|
||||
dstelem = val
|
||||
|
||||
#else
|
||||
#error "unknown op type"
|
||||
#endif
|
||||
|
89
modules/core/src/opencl/inrange.cl
Normal file
89
modules/core/src/opencl/inrange.cl
Normal file
@ -0,0 +1,89 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
__kernel void inrange(__global const uchar * src1ptr, int src1_step, int src1_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
|
||||
#ifdef HAVE_SCALAR
|
||||
__global const T * src2, __global const T * src3
|
||||
#else
|
||||
__global const uchar * src2ptr, int src2_step, int src2_offset,
|
||||
__global const uchar * src3ptr, int src3_step, int src3_offset
|
||||
#endif
|
||||
)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, x*(int)sizeof(T)*cn + src1_offset);
|
||||
int dst_index = mad24(y, dst_step, x + dst_offset);
|
||||
__global const T * src1 = (__global const T *)(src1ptr + src1_index);
|
||||
__global uchar * dst = dstptr + dst_index;
|
||||
|
||||
#ifndef HAVE_SCALAR
|
||||
int src2_index = mad24(y, src2_step, x*(int)sizeof(T)*cn + src2_offset);
|
||||
int src3_index = mad24(y, src3_step, x*(int)sizeof(T)*cn + src3_offset);
|
||||
__global const T * src2 = (__global const T *)(src2ptr + src2_index);
|
||||
__global const T * src3 = (__global const T *)(src3ptr + src3_index);
|
||||
#endif
|
||||
|
||||
dst[0] = 255;
|
||||
|
||||
#pragma unroll
|
||||
for (int c = 0; c < cn; ++c)
|
||||
if ( src2[c] > src1[c] || src3[c] < src1[c] )
|
||||
{
|
||||
dst[0] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
64
modules/core/src/opencl/mixchannels.cl
Normal file
64
modules/core/src/opencl/mixchannels.cl
Normal file
@ -0,0 +1,64 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the copyright holders or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#define DECLARE_INPUT_MAT(i) \
|
||||
__global const uchar * src##i##ptr, int src##i##_step, int src##i##_offset,
|
||||
#define DECLARE_OUTPUT_MAT(i) \
|
||||
__global const uchar * dst##i##ptr, int dst##i##_step, int dst##i##_offset,
|
||||
#define PROCESS_ELEM(i) \
|
||||
int src##i##_index = mad24(src##i##_step, y, x * (int)sizeof(T) * scn##i + src##i##_offset); \
|
||||
__global const T * src##i = (__global const T *)(src##i##ptr + src##i##_index); \
|
||||
int dst##i##_index = mad24(dst##i##_step, y, x * (int)sizeof(T) * dcn##i + dst##i##_offset); \
|
||||
__global T * dst##i = (__global T *)(dst##i##ptr + dst##i##_index); \
|
||||
dst##i[0] = src##i[0];
|
||||
|
||||
__kernel void mixChannels(DECLARE_INPUT_MATS DECLARE_OUTPUT_MATS int rows, int cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
PROCESS_ELEMS
|
||||
}
|
||||
}
|
@ -107,7 +107,7 @@ std::wstring GetTempPathWinRT()
|
||||
if (FAILED(WindowsCreateStringReference(RuntimeClass_Windows_Storage_ApplicationData,
|
||||
(UINT32)wcslen(RuntimeClass_Windows_Storage_ApplicationData), &hstrHead, &str)))
|
||||
return wstr;
|
||||
if (FAILED(Windows::Foundation::GetActivationFactory(str, appdataFactory.ReleaseAndGetAddressOf())))
|
||||
if (FAILED(RoGetActivationFactory(str, IID_PPV_ARGS(appdataFactory.ReleaseAndGetAddressOf()))))
|
||||
return wstr;
|
||||
if (FAILED(appdataFactory->get_Current(appdataRef.ReleaseAndGetAddressOf())))
|
||||
return wstr;
|
||||
|
@ -42,6 +42,8 @@
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
@ -1234,13 +1236,186 @@ OCL_TEST_P(Normalize, Mat)
|
||||
for (int i = 0, size = sizeof(modes) / sizeof(modes[0]); i < size; ++i)
|
||||
{
|
||||
OCL_OFF(cv::normalize(src1_roi, dst1_roi, 10, 110, modes[i], src1_roi.type(), mask_roi));
|
||||
OCL_ON(cv::normalize(usrc1_roi, udst1_roi, 10, 110, modes[i], src1_roi.type(), umask_roi));
|
||||
OCL_ON(cv::normalize(usrc1_roi, udst1_roi, 10, 110, modes[i], src1_roi.type(), umask_roi));
|
||||
|
||||
Near(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////// InRange ///////////////////////////////////////////////
|
||||
|
||||
PARAM_TEST_CASE(InRange, MatDepth, Channels, bool /*Scalar or not*/, bool /*Roi*/)
|
||||
{
|
||||
int depth;
|
||||
int cn;
|
||||
bool scalars, use_roi;
|
||||
cv::Scalar val1, val2;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src1)
|
||||
TEST_DECLARE_INPUT_PARAMETER(src2)
|
||||
TEST_DECLARE_INPUT_PARAMETER(src3)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst)
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
depth = GET_PARAM(0);
|
||||
cn = GET_PARAM(1);
|
||||
scalars = GET_PARAM(2);
|
||||
use_roi = GET_PARAM(3);
|
||||
}
|
||||
|
||||
virtual void generateTestData()
|
||||
{
|
||||
const int type = CV_MAKE_TYPE(depth, cn);
|
||||
|
||||
Size roiSize = randomSize(1, MAX_VALUE);
|
||||
Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src1, src1_roi, roiSize, src1Border, type, -40, 40);
|
||||
|
||||
Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src2, src2_roi, roiSize, src2Border, type, -40, 40);
|
||||
|
||||
Border src3Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src3, src3_roi, roiSize, src3Border, type, -40, 40);
|
||||
|
||||
Border dstBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, CV_8UC1, 5, 16);
|
||||
|
||||
val1 = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0),
|
||||
rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0));
|
||||
val2 = cv::Scalar(rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0),
|
||||
rng.uniform(-100.0, 100.0), rng.uniform(-100.0, 100.0));
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src1)
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src2)
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src3)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
|
||||
}
|
||||
|
||||
void Near()
|
||||
{
|
||||
OCL_EXPECT_MATS_NEAR(dst, 0)
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(InRange, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::inRange(src1_roi, src2_roi, src3_roi, dst_roi));
|
||||
OCL_ON(cv::inRange(usrc1_roi, usrc2_roi, usrc3_roi, udst_roi));
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
OCL_TEST_P(InRange, Scalar)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::inRange(src1_roi, val1, val2, dst_roi));
|
||||
OCL_ON(cv::inRange(usrc1_roi, val1, val2, udst_roi));
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// ConvertScaleAbs ////////////////////////////////////////////////
|
||||
|
||||
typedef ArithmTestBase ConvertScaleAbs;
|
||||
|
||||
OCL_TEST_P(ConvertScaleAbs, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::convertScaleAbs(src1_roi, dst1_roi, val[0], val[1]));
|
||||
OCL_ON(cv::convertScaleAbs(usrc1_roi, udst1_roi, val[0], val[1]));
|
||||
|
||||
Near(depth <= CV_32S ? 1 : 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// ScaleAdd ////////////////////////////////////////////////
|
||||
|
||||
typedef ArithmTestBase ScaleAdd;
|
||||
|
||||
OCL_TEST_P(ScaleAdd, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::scaleAdd(src1_roi, val[0], src2_roi, dst1_roi));
|
||||
OCL_ON(cv::scaleAdd(usrc1_roi, val[0], usrc2_roi, udst1_roi));
|
||||
|
||||
Near(depth <= CV_32S ? 1 : 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////// PatchNans ////////////////////////////////////////////////
|
||||
|
||||
PARAM_TEST_CASE(PatchNaNs, Channels, bool)
|
||||
{
|
||||
int cn;
|
||||
bool use_roi;
|
||||
double value;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src)
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
cn = GET_PARAM(0);
|
||||
use_roi = GET_PARAM(1);
|
||||
}
|
||||
|
||||
virtual void generateTestData()
|
||||
{
|
||||
const int type = CV_MAKE_TYPE(CV_32F, cn);
|
||||
|
||||
Size roiSize = randomSize(1, 10);
|
||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, -40, 40);
|
||||
|
||||
// generating NaNs
|
||||
roiSize.width *= cn;
|
||||
for (int y = 0; y < roiSize.height; ++y)
|
||||
{
|
||||
float * const ptr = src_roi.ptr<float>(y);
|
||||
for (int x = 0; x < roiSize.width; ++x)
|
||||
ptr[x] = randomInt(-1, 1) == 0 ? std::numeric_limits<float>::quiet_NaN() : ptr[x];
|
||||
}
|
||||
|
||||
value = randomDouble(-100, 100);
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src)
|
||||
}
|
||||
|
||||
void Near()
|
||||
{
|
||||
OCL_EXPECT_MATS_NEAR(src, 0)
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(PatchNaNs, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::patchNaNs(src_roi, value));
|
||||
OCL_ON(cv::patchNaNs(usrc_roi, value));
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////// Instantiation /////////////////////////////////////////
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine(::testing::Values(CV_8U, CV_8S), OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
|
||||
@ -1276,7 +1451,10 @@ OCL_INSTANTIATE_TEST_CASE_P(Arithm, MinMaxIdx_Mask, Combine(OCL_ALL_DEPTHS, ::te
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Norm, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Sqrt, Combine(::testing::Values(CV_32F, CV_64F), OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, Normalize, Combine(OCL_ALL_DEPTHS, Values(Channels(1)), Bool()));
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, InRange, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool(), Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ConvertScaleAbs, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, ScaleAdd, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Arithm, PatchNaNs, Combine(OCL_ALL_CHANNELS, Bool()));
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
|
@ -52,7 +52,9 @@
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool)
|
||||
//////////////////////////////////////// Merge ///////////////////////////////////////////////
|
||||
|
||||
PARAM_TEST_CASE(Merge, MatDepth, Channels, bool)
|
||||
{
|
||||
int depth, cn;
|
||||
bool use_roi;
|
||||
@ -75,7 +77,7 @@ PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool)
|
||||
CV_Assert(cn >= 1 && cn <= 4);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
void generateTestData()
|
||||
{
|
||||
Size roiSize = randomSize(1, MAX_VALUE);
|
||||
|
||||
@ -117,13 +119,11 @@ PARAM_TEST_CASE(MergeTestBase, MatDepth, Channels, bool)
|
||||
}
|
||||
};
|
||||
|
||||
typedef MergeTestBase Merge;
|
||||
|
||||
OCL_TEST_P(Merge, Accuracy)
|
||||
{
|
||||
for(int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::merge(src_roi, dst_roi));
|
||||
OCL_ON(cv::merge(usrc_roi, udst_roi));
|
||||
@ -132,7 +132,9 @@ OCL_TEST_P(Merge, Accuracy)
|
||||
}
|
||||
}
|
||||
|
||||
PARAM_TEST_CASE(SplitTestBase, MatType, Channels, bool)
|
||||
//////////////////////////////////////// Split ///////////////////////////////////////////////
|
||||
|
||||
PARAM_TEST_CASE(Split, MatType, Channels, bool)
|
||||
{
|
||||
int depth, cn;
|
||||
bool use_roi;
|
||||
@ -155,7 +157,7 @@ PARAM_TEST_CASE(SplitTestBase, MatType, Channels, bool)
|
||||
CV_Assert(cn >= 1 && cn <= 4);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
void generateTestData()
|
||||
{
|
||||
Size roiSize = randomSize(1, MAX_VALUE);
|
||||
Border srcBorder = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
@ -195,13 +197,11 @@ PARAM_TEST_CASE(SplitTestBase, MatType, Channels, bool)
|
||||
}
|
||||
};
|
||||
|
||||
typedef SplitTestBase Split;
|
||||
|
||||
OCL_TEST_P(Split, DISABLED_Accuracy)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::split(src_roi, dst_roi));
|
||||
OCL_ON(cv::split(usrc_roi, udst_roi));
|
||||
@ -214,8 +214,150 @@ OCL_TEST_P(Split, DISABLED_Accuracy)
|
||||
}
|
||||
}
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(SplitMerge, Split, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
//////////////////////////////////////// MixChannels ///////////////////////////////////////////////
|
||||
|
||||
PARAM_TEST_CASE(MixChannels, MatType, bool)
|
||||
{
|
||||
int depth;
|
||||
bool use_roi;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src1)
|
||||
TEST_DECLARE_INPUT_PARAMETER(src2)
|
||||
TEST_DECLARE_INPUT_PARAMETER(src3)
|
||||
TEST_DECLARE_INPUT_PARAMETER(src4)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst1)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst2)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst3)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst4)
|
||||
|
||||
std::vector<Mat> src_roi, dst_roi, dst;
|
||||
std::vector<UMat> usrc_roi, udst_roi, udst;
|
||||
std::vector<int> fromTo;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
depth = GET_PARAM(0);
|
||||
use_roi = GET_PARAM(1);
|
||||
}
|
||||
|
||||
// generate number of channels and create type
|
||||
int type()
|
||||
{
|
||||
int cn = randomInt(1, 5);
|
||||
return CV_MAKE_TYPE(depth, cn);
|
||||
}
|
||||
|
||||
void generateTestData()
|
||||
{
|
||||
src_roi.clear();
|
||||
dst_roi.clear();
|
||||
dst.clear();
|
||||
usrc_roi.clear();
|
||||
udst_roi.clear();
|
||||
udst.clear();
|
||||
fromTo.clear();
|
||||
|
||||
Size roiSize = randomSize(1, MAX_VALUE);
|
||||
|
||||
{
|
||||
Border src1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src1, src1_roi, roiSize, src1Border, type(), 2, 11);
|
||||
|
||||
Border src2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src2, src2_roi, roiSize, src2Border, type(), -1540, 1740);
|
||||
|
||||
Border src3Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src3, src3_roi, roiSize, src3Border, type(), -1540, 1740);
|
||||
|
||||
Border src4Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(src4, src4_roi, roiSize, src4Border, type(), -1540, 1740);
|
||||
}
|
||||
|
||||
{
|
||||
Border dst1Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst1, dst1_roi, roiSize, dst1Border, type(), 2, 11);
|
||||
|
||||
Border dst2Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst2, dst2_roi, roiSize, dst2Border, type(), -1540, 1740);
|
||||
|
||||
Border dst3Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst3, dst3_roi, roiSize, dst3Border, type(), -1540, 1740);
|
||||
|
||||
Border dst4Border = randomBorder(0, use_roi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst4, dst4_roi, roiSize, dst4Border, type(), -1540, 1740);
|
||||
}
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src1)
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src2)
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src3)
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src4)
|
||||
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst1)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst2)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst3)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst4)
|
||||
|
||||
int nsrc = randomInt(1, 5), ndst = randomInt(1, 5);
|
||||
|
||||
src_roi.push_back(src1_roi), usrc_roi.push_back(usrc1_roi);
|
||||
if (nsrc >= 2)
|
||||
src_roi.push_back(src2_roi), usrc_roi.push_back(usrc2_roi);
|
||||
if (nsrc >= 3)
|
||||
src_roi.push_back(src3_roi), usrc_roi.push_back(usrc3_roi);
|
||||
if (nsrc >= 4)
|
||||
src_roi.push_back(src4_roi), usrc_roi.push_back(usrc4_roi);
|
||||
|
||||
dst_roi.push_back(dst1_roi), udst_roi.push_back(udst1_roi),
|
||||
dst.push_back(dst1), udst.push_back(udst1);
|
||||
if (ndst >= 2)
|
||||
dst_roi.push_back(dst2_roi), udst_roi.push_back(udst2_roi),
|
||||
dst.push_back(dst2), udst.push_back(udst2);
|
||||
if (ndst >= 3)
|
||||
dst_roi.push_back(dst3_roi), udst_roi.push_back(udst3_roi),
|
||||
dst.push_back(dst3), udst.push_back(udst3);
|
||||
if (ndst >= 4)
|
||||
dst_roi.push_back(dst4_roi), udst_roi.push_back(udst4_roi),
|
||||
dst.push_back(dst4), udst.push_back(udst4);
|
||||
|
||||
int scntotal = 0, dcntotal = 0;
|
||||
for (int i = 0; i < nsrc; ++i)
|
||||
scntotal += src_roi[i].channels();
|
||||
for (int i = 0; i < ndst; ++i)
|
||||
dcntotal += dst_roi[i].channels();
|
||||
|
||||
int npairs = randomInt(1, std::min(scntotal, dcntotal) + 1);
|
||||
fromTo.resize(npairs << 1);
|
||||
|
||||
for (int i = 0; i < npairs; ++i)
|
||||
{
|
||||
fromTo[i<<1] = randomInt(0, scntotal);
|
||||
fromTo[(i<<1)+1] = randomInt(0, dcntotal);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(MixChannels, Accuracy)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times + 10; j++)
|
||||
{
|
||||
generateTestData();
|
||||
|
||||
OCL_OFF(cv::mixChannels(src_roi, dst_roi, fromTo));
|
||||
OCL_ON(cv::mixChannels(usrc_roi, udst_roi, fromTo));
|
||||
|
||||
for (size_t i = 0, size = dst_roi.size(); i < size; ++i)
|
||||
{
|
||||
EXPECT_MAT_NEAR(dst[i], udst[i], 0.0);
|
||||
EXPECT_MAT_NEAR(dst_roi[i], udst_roi[i], 0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////// Instantiation ///////////////////////////////////////////////
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Channels, Merge, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Channels, Split, Combine(OCL_ALL_DEPTHS, OCL_ALL_CHANNELS, Bool()));
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Channels, MixChannels, Combine(OCL_ALL_DEPTHS, Bool()));
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
|
@ -25,7 +25,7 @@ TEST(Core_Drawing, _914)
|
||||
}
|
||||
|
||||
|
||||
TEST(Core_OutputArraySreate, _1997)
|
||||
TEST(Core_OutputArrayCreate, _1997)
|
||||
{
|
||||
struct local {
|
||||
static void create(OutputArray arr, Size submatSize, int type)
|
||||
|
@ -281,7 +281,7 @@ CUDA_TEST_P(ConvertTo, WithOutScaling)
|
||||
cv::Mat dst_gold;
|
||||
src.convertTo(dst_gold, depth2);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -177,7 +177,7 @@ struct HOG : testing::TestWithParam<cv::cuda::DeviceInfo>, cv::cuda::HOGDescript
|
||||
};
|
||||
|
||||
// desabled while resize does not fixed
|
||||
CUDA_TEST_P(HOG, Detect)
|
||||
CUDA_TEST_P(HOG, DISABLED_Detect)
|
||||
{
|
||||
cv::Mat img_rgb = readImage("hog/road.png");
|
||||
ASSERT_FALSE(img_rgb.empty());
|
||||
|
@ -49,6 +49,8 @@ using namespace perf;
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GEMM
|
||||
|
||||
#ifdef HAVE_CUBLAS
|
||||
|
||||
CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T)
|
||||
#define ALL_GEMM_FLAGS Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), \
|
||||
GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
|
||||
@ -98,6 +100,8 @@ PERF_TEST_P(Sz_Type_Flags, GEMM,
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// MulSpectrums
|
||||
|
||||
|
@ -2514,7 +2514,7 @@ CUDA_TEST_P(AddWeighted, Accuracy)
|
||||
cv::Mat dst_gold;
|
||||
cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-3);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 2.0 : 1e-3);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -734,7 +734,7 @@ CUDA_TEST_P(Normalize, WithOutMask)
|
||||
cv::Mat dst_gold;
|
||||
cv::normalize(src, dst_gold, alpha, beta, norm_type, type);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1e-6);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(Normalize, WithMask)
|
||||
|
@ -715,7 +715,7 @@ CUDA_TEST_P(CvtColor, BGR2YCrCb)
|
||||
cv::Mat dst_gold;
|
||||
cv::cvtColor(src, dst_gold, cv::COLOR_BGR2YCrCb);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(CvtColor, RGB2YCrCb)
|
||||
@ -728,7 +728,7 @@ CUDA_TEST_P(CvtColor, RGB2YCrCb)
|
||||
cv::Mat dst_gold;
|
||||
cv::cvtColor(src, dst_gold, cv::COLOR_RGB2YCrCb);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
|
||||
EXPECT_MAT_NEAR(dst_gold, dst, 1.0);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(CvtColor, BGR2YCrCb4)
|
||||
@ -749,7 +749,7 @@ CUDA_TEST_P(CvtColor, BGR2YCrCb4)
|
||||
cv::split(h_dst, channels);
|
||||
cv::merge(channels, 3, h_dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, h_dst, 1e-5);
|
||||
EXPECT_MAT_NEAR(dst_gold, h_dst, 1.0);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(CvtColor, RGBA2YCrCb4)
|
||||
@ -771,7 +771,7 @@ CUDA_TEST_P(CvtColor, RGBA2YCrCb4)
|
||||
cv::split(h_dst, channels);
|
||||
cv::merge(channels, 3, h_dst);
|
||||
|
||||
EXPECT_MAT_NEAR(dst_gold, h_dst, 1e-5);
|
||||
EXPECT_MAT_NEAR(dst_gold, h_dst, 1.0);
|
||||
}
|
||||
|
||||
CUDA_TEST_P(CvtColor, YCrCb2BGR)
|
||||
|
@ -444,7 +444,7 @@ PERF_TEST_P(ImagePair, OpticalFlowBM,
|
||||
}
|
||||
}
|
||||
|
||||
PERF_TEST_P(ImagePair, FastOpticalFlowBM,
|
||||
PERF_TEST_P(ImagePair, DISABLED_FastOpticalFlowBM,
|
||||
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
|
||||
{
|
||||
declare.time(400);
|
||||
|
@ -297,6 +297,11 @@ public:
|
||||
trees_ = get_param(params,"trees",4);
|
||||
root = new NodePtr[trees_];
|
||||
indices = new int*[trees_];
|
||||
|
||||
for (int i=0; i<trees_; ++i) {
|
||||
root[i] = NULL;
|
||||
indices[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
HierarchicalClusteringIndex(const HierarchicalClusteringIndex&);
|
||||
@ -309,11 +314,34 @@ public:
|
||||
*/
|
||||
virtual ~HierarchicalClusteringIndex()
|
||||
{
|
||||
free_elements();
|
||||
|
||||
if (root!=NULL) {
|
||||
delete[] root;
|
||||
}
|
||||
|
||||
if (indices!=NULL) {
|
||||
delete[] indices;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Release the inner elements of indices[]
|
||||
*/
|
||||
void free_elements()
|
||||
{
|
||||
if (indices!=NULL) {
|
||||
for(int i=0; i<trees_; ++i) {
|
||||
if (indices[i]!=NULL) {
|
||||
delete[] indices[i];
|
||||
indices[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns size of index.
|
||||
*/
|
||||
@ -348,6 +376,9 @@ public:
|
||||
if (branching_<2) {
|
||||
throw FLANNException("Branching factor must be at least 2");
|
||||
}
|
||||
|
||||
free_elements();
|
||||
|
||||
for (int i=0; i<trees_; ++i) {
|
||||
indices[i] = new int[size_];
|
||||
for (size_t j=0; j<size_; ++j) {
|
||||
@ -387,6 +418,17 @@ public:
|
||||
load_value(stream, centers_init_);
|
||||
load_value(stream, leaf_size_);
|
||||
load_value(stream, memoryCounter);
|
||||
|
||||
free_elements();
|
||||
|
||||
if (root!=NULL) {
|
||||
delete[] root;
|
||||
}
|
||||
|
||||
if (indices!=NULL) {
|
||||
delete[] indices;
|
||||
}
|
||||
|
||||
indices = new int*[trees_];
|
||||
root = new NodePtr[trees_];
|
||||
for (int i=0; i<trees_; ++i) {
|
||||
|
@ -222,6 +222,12 @@ elseif(HAVE_QTKIT)
|
||||
list(APPEND HIGHGUI_LIBRARIES "-framework QTKit" "-framework QuartzCore" "-framework AppKit")
|
||||
endif()
|
||||
|
||||
if(HAVE_INTELPERC)
|
||||
list(APPEND highgui_srcs src/cap_intelperc.cpp)
|
||||
ocv_include_directories(${INTELPERC_INCLUDE_DIR})
|
||||
list(APPEND HIGHGUI_LIBRARIES ${INTELPERC_LIBRARIES})
|
||||
endif(HAVE_INTELPERC)
|
||||
|
||||
if(IOS)
|
||||
add_definitions(-DHAVE_IOS=1)
|
||||
list(APPEND highgui_srcs src/ios_conversions.mm src/cap_ios_abstract_camera.mm src/cap_ios_photo_camera.mm src/cap_ios_video_camera.mm)
|
||||
|
@ -271,7 +271,8 @@ enum { CAP_ANY = 0, // autodetect
|
||||
CAP_XIAPI = 1100, // XIMEA Camera API
|
||||
CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
|
||||
CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
|
||||
CAP_MSMF = 1400 // Microsoft Media Foundation (via videoInput)
|
||||
CAP_MSMF = 1400, // Microsoft Media Foundation (via videoInput)
|
||||
CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
|
||||
};
|
||||
|
||||
// generic properties (based on DC1394 properties)
|
||||
@ -496,6 +497,26 @@ enum { CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
|
||||
CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
|
||||
};
|
||||
|
||||
enum { CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
|
||||
CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
|
||||
CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
|
||||
CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
|
||||
CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
|
||||
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
|
||||
CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007
|
||||
};
|
||||
|
||||
// Intel PerC streams
|
||||
enum { CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
|
||||
CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
|
||||
CAP_INTELPERC_GENERATORS_MASK = CAP_INTELPERC_DEPTH_GENERATOR + CAP_INTELPERC_IMAGE_GENERATOR
|
||||
};
|
||||
|
||||
enum { CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
|
||||
CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
|
||||
CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
|
||||
CAP_INTELPERC_IMAGE = 3
|
||||
};
|
||||
|
||||
class CV_EXPORTS_W VideoCapture
|
||||
{
|
||||
|
@ -313,7 +313,9 @@ enum
|
||||
|
||||
CV_CAP_AVFOUNDATION = 1200, // AVFoundation framework for iOS (OS X Lion will have the same API)
|
||||
|
||||
CV_CAP_GIGANETIX = 1300 // Smartek Giganetix GigEVisionSDK
|
||||
CV_CAP_GIGANETIX = 1300, // Smartek Giganetix GigEVisionSDK
|
||||
|
||||
CV_CAP_INTELPERC = 1500 // Intel Perceptual Computing SDK
|
||||
};
|
||||
|
||||
/* start capturing frames from camera: index = camera_index + domain_offset (CV_CAP_*) */
|
||||
@ -459,16 +461,29 @@ enum
|
||||
CV_CAP_PROP_IOS_DEVICE_EXPOSURE = 9002,
|
||||
CV_CAP_PROP_IOS_DEVICE_FLASH = 9003,
|
||||
CV_CAP_PROP_IOS_DEVICE_WHITEBALANCE = 9004,
|
||||
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005
|
||||
CV_CAP_PROP_IOS_DEVICE_TORCH = 9005,
|
||||
|
||||
// Properties of cameras available through Smartek Giganetix Ethernet Vision interface
|
||||
/* --- Vladimir Litvinenko (litvinenko.vladimir@gmail.com) --- */
|
||||
,CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
|
||||
CV_CAP_PROP_GIGA_FRAME_OFFSET_X = 10001,
|
||||
CV_CAP_PROP_GIGA_FRAME_OFFSET_Y = 10002,
|
||||
CV_CAP_PROP_GIGA_FRAME_WIDTH_MAX = 10003,
|
||||
CV_CAP_PROP_GIGA_FRAME_HEIGH_MAX = 10004,
|
||||
CV_CAP_PROP_GIGA_FRAME_SENS_WIDTH = 10005,
|
||||
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006
|
||||
CV_CAP_PROP_GIGA_FRAME_SENS_HEIGH = 10006,
|
||||
|
||||
CV_CAP_PROP_INTELPERC_PROFILE_COUNT = 11001,
|
||||
CV_CAP_PROP_INTELPERC_PROFILE_IDX = 11002,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE = 11003,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE = 11004,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD = 11005,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ = 11006,
|
||||
CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT = 11007,
|
||||
|
||||
// Intel PerC streams
|
||||
CV_CAP_INTELPERC_DEPTH_GENERATOR = 1 << 29,
|
||||
CV_CAP_INTELPERC_IMAGE_GENERATOR = 1 << 28,
|
||||
CV_CAP_INTELPERC_GENERATORS_MASK = CV_CAP_INTELPERC_DEPTH_GENERATOR + CV_CAP_INTELPERC_IMAGE_GENERATOR
|
||||
};
|
||||
|
||||
enum
|
||||
@ -549,6 +564,14 @@ enum
|
||||
CV_CAP_ANDROID_ANTIBANDING_OFF
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
CV_CAP_INTELPERC_DEPTH_MAP = 0, // Each pixel is a 16-bit integer. The value indicates the distance from an object to the camera's XY plane or the Cartesian depth.
|
||||
CV_CAP_INTELPERC_UVDEPTH_MAP = 1, // Each pixel contains two 32-bit floating point values in the range of 0-1, representing the mapping of depth coordinates to the color coordinates.
|
||||
CV_CAP_INTELPERC_IR_MAP = 2, // Each pixel is a 16-bit integer. The value indicates the intensity of the reflected laser beam.
|
||||
CV_CAP_INTELPERC_IMAGE = 3
|
||||
};
|
||||
|
||||
/* retrieve or set capture properties */
|
||||
CVAPI(double) cvGetCaptureProperty( CvCapture* capture, int property_id );
|
||||
CVAPI(int) cvSetCaptureProperty( CvCapture* capture, int property_id, double value );
|
||||
|
@ -155,6 +155,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
|
||||
#endif
|
||||
#ifdef HAVE_GIGE_API
|
||||
CV_CAP_GIGANETIX,
|
||||
#endif
|
||||
#ifdef HAVE_INTELPERC
|
||||
CV_CAP_INTELPERC,
|
||||
#endif
|
||||
-1
|
||||
};
|
||||
@ -193,6 +196,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
|
||||
defined(HAVE_AVFOUNDATION) || \
|
||||
defined(HAVE_ANDROID_NATIVE_CAMERA) || \
|
||||
defined(HAVE_GIGE_API) || \
|
||||
defined(HAVE_INTELPERC) || \
|
||||
(0)
|
||||
// local variable to memorize the captured device
|
||||
CvCapture *capture;
|
||||
@ -342,6 +346,13 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index)
|
||||
break; // CV_CAP_GIGANETIX
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_INTELPERC
|
||||
case CV_CAP_INTELPERC:
|
||||
capture = cvCreateCameraCapture_IntelPerC(index);
|
||||
if (capture)
|
||||
return capture;
|
||||
break; // CV_CAP_INTEL_PERC
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
714
modules/highgui/src/cap_intelperc.cpp
Normal file
714
modules/highgui/src/cap_intelperc.cpp
Normal file
@ -0,0 +1,714 @@
|
||||
#include "precomp.hpp"
|
||||
|
||||
#ifdef HAVE_INTELPERC
|
||||
|
||||
#include "pxcsession.h"
|
||||
#include "pxcsmartptr.h"
|
||||
#include "pxccapture.h"
|
||||
|
||||
class CvIntelPerCStreamBase
|
||||
{
|
||||
protected:
|
||||
struct FrameInternal
|
||||
{
|
||||
IplImage* retrieveFrame()
|
||||
{
|
||||
if (m_mat.empty())
|
||||
return NULL;
|
||||
m_iplHeader = IplImage(m_mat);
|
||||
return &m_iplHeader;
|
||||
}
|
||||
cv::Mat m_mat;
|
||||
private:
|
||||
IplImage m_iplHeader;
|
||||
};
|
||||
public:
|
||||
CvIntelPerCStreamBase()
|
||||
: m_profileIdx(-1)
|
||||
, m_frameIdx(0)
|
||||
, m_timeStampStartNS(0)
|
||||
{
|
||||
}
|
||||
virtual ~CvIntelPerCStreamBase()
|
||||
{
|
||||
}
|
||||
|
||||
bool isValid()
|
||||
{
|
||||
return (m_device.IsValid() && m_stream.IsValid());
|
||||
}
|
||||
bool grabFrame()
|
||||
{
|
||||
if (!m_stream.IsValid())
|
||||
return false;
|
||||
if (-1 == m_profileIdx)
|
||||
{
|
||||
if (!setProperty(CV_CAP_PROP_INTELPERC_PROFILE_IDX, 0))
|
||||
return false;
|
||||
}
|
||||
PXCSmartPtr<PXCImage> pxcImage; PXCSmartSP sp;
|
||||
if (PXC_STATUS_NO_ERROR > m_stream->ReadStreamAsync(&pxcImage, &sp))
|
||||
return false;
|
||||
if (PXC_STATUS_NO_ERROR > sp->Synchronize())
|
||||
return false;
|
||||
if (0 == m_timeStampStartNS)
|
||||
m_timeStampStartNS = pxcImage->QueryTimeStamp();
|
||||
m_timeStamp = (double)((pxcImage->QueryTimeStamp() - m_timeStampStartNS) / 10000);
|
||||
m_frameIdx++;
|
||||
return prepareIplImage(pxcImage);
|
||||
}
|
||||
int getProfileIDX() const
|
||||
{
|
||||
return m_profileIdx;
|
||||
}
|
||||
public:
|
||||
virtual bool initStream(PXCSession *session) = 0;
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
double ret = 0.0;
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_PROFILE_COUNT:
|
||||
ret = (double)m_profiles.size();
|
||||
break;
|
||||
case CV_CAP_PROP_FRAME_WIDTH :
|
||||
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
|
||||
ret = (double)m_profiles[m_profileIdx].imageInfo.width;
|
||||
break;
|
||||
case CV_CAP_PROP_FRAME_HEIGHT :
|
||||
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
|
||||
ret = (double)m_profiles[m_profileIdx].imageInfo.height;
|
||||
break;
|
||||
case CV_CAP_PROP_FPS :
|
||||
if ((0 <= m_profileIdx) && (m_profileIdx < m_profiles.size()))
|
||||
{
|
||||
ret = ((double)m_profiles[m_profileIdx].frameRateMin.numerator / (double)m_profiles[m_profileIdx].frameRateMin.denominator
|
||||
+ (double)m_profiles[m_profileIdx].frameRateMax.numerator / (double)m_profiles[m_profileIdx].frameRateMax.denominator) / 2.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_POS_FRAMES:
|
||||
ret = (double)m_frameIdx;
|
||||
break;
|
||||
case CV_CAP_PROP_POS_MSEC:
|
||||
ret = m_timeStamp;
|
||||
break;
|
||||
};
|
||||
return ret;
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
bool isSet = false;
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_PROFILE_IDX:
|
||||
{
|
||||
int propValInt = (int)propVal;
|
||||
if ((0 <= propValInt) && (propValInt < m_profiles.size()))
|
||||
{
|
||||
if (m_profileIdx != propValInt)
|
||||
{
|
||||
m_profileIdx = propValInt;
|
||||
if (m_stream.IsValid())
|
||||
m_stream->SetProfile(&m_profiles[m_profileIdx]);
|
||||
m_frameIdx = 0;
|
||||
m_timeStampStartNS = 0;
|
||||
}
|
||||
isSet = true;
|
||||
}
|
||||
}
|
||||
break;
|
||||
};
|
||||
return isSet;
|
||||
}
|
||||
protected:
|
||||
PXCSmartPtr<PXCCapture::Device> m_device;
|
||||
bool initDevice(PXCSession *session)
|
||||
{
|
||||
if (NULL == session)
|
||||
return false;
|
||||
|
||||
pxcStatus sts = PXC_STATUS_NO_ERROR;
|
||||
PXCSession::ImplDesc templat;
|
||||
memset(&templat,0,sizeof(templat));
|
||||
templat.group = PXCSession::IMPL_GROUP_SENSOR;
|
||||
templat.subgroup= PXCSession::IMPL_SUBGROUP_VIDEO_CAPTURE;
|
||||
|
||||
for (int modidx = 0; PXC_STATUS_NO_ERROR <= sts; modidx++)
|
||||
{
|
||||
PXCSession::ImplDesc desc;
|
||||
sts = session->QueryImpl(&templat, modidx, &desc);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
break;
|
||||
|
||||
PXCSmartPtr<PXCCapture> capture;
|
||||
sts = session->CreateImpl<PXCCapture>(&desc, &capture);
|
||||
if (!capture.IsValid())
|
||||
continue;
|
||||
|
||||
/* enumerate devices */
|
||||
for (int devidx = 0; PXC_STATUS_NO_ERROR <= sts; devidx++)
|
||||
{
|
||||
PXCSmartPtr<PXCCapture::Device> device;
|
||||
sts = capture->CreateDevice(devidx, &device);
|
||||
if (PXC_STATUS_NO_ERROR <= sts)
|
||||
{
|
||||
m_device = device.ReleasePtr();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
PXCSmartPtr<PXCCapture::VideoStream> m_stream;
|
||||
void initStreamImpl(PXCImage::ImageType type)
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return;
|
||||
|
||||
pxcStatus sts = PXC_STATUS_NO_ERROR;
|
||||
/* enumerate streams */
|
||||
for (int streamidx = 0; PXC_STATUS_NO_ERROR <= sts; streamidx++)
|
||||
{
|
||||
PXCCapture::Device::StreamInfo sinfo;
|
||||
sts = m_device->QueryStream(streamidx, &sinfo);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
break;
|
||||
if (PXCCapture::VideoStream::CUID != sinfo.cuid)
|
||||
continue;
|
||||
if (type != sinfo.imageType)
|
||||
continue;
|
||||
|
||||
sts = m_device->CreateStream<PXCCapture::VideoStream>(streamidx, &m_stream);
|
||||
if (PXC_STATUS_NO_ERROR == sts)
|
||||
break;
|
||||
m_stream.ReleaseRef();
|
||||
}
|
||||
}
|
||||
protected:
|
||||
std::vector<PXCCapture::VideoStream::ProfileInfo> m_profiles;
|
||||
int m_profileIdx;
|
||||
int m_frameIdx;
|
||||
pxcU64 m_timeStampStartNS;
|
||||
double m_timeStamp;
|
||||
|
||||
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& /*pinfo*/)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
void enumProfiles()
|
||||
{
|
||||
m_profiles.clear();
|
||||
if (!m_stream.IsValid())
|
||||
return;
|
||||
pxcStatus sts = PXC_STATUS_NO_ERROR;
|
||||
for (int profidx = 0; PXC_STATUS_NO_ERROR <= sts; profidx++)
|
||||
{
|
||||
PXCCapture::VideoStream::ProfileInfo pinfo;
|
||||
sts = m_stream->QueryProfile(profidx, &pinfo);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
break;
|
||||
if (validProfile(pinfo))
|
||||
m_profiles.push_back(pinfo);
|
||||
}
|
||||
}
|
||||
virtual bool prepareIplImage(PXCImage *pxcImage) = 0;
|
||||
};
|
||||
|
||||
class CvIntelPerCStreamImage
|
||||
: public CvIntelPerCStreamBase
|
||||
{
|
||||
public:
|
||||
CvIntelPerCStreamImage()
|
||||
{
|
||||
}
|
||||
virtual ~CvIntelPerCStreamImage()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool initStream(PXCSession *session)
|
||||
{
|
||||
if (!initDevice(session))
|
||||
return false;
|
||||
initStreamImpl(PXCImage::IMAGE_TYPE_COLOR);
|
||||
if (!m_stream.IsValid())
|
||||
return false;
|
||||
enumProfiles();
|
||||
return true;
|
||||
}
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_BRIGHTNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_CONTRAST:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SATURATION:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_HUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAMMA:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SHARPNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAIN:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_BACKLIGHT:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_EXPOSURE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
//Add image stream specific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::getProperty(propIdx);
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_BRIGHTNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BRIGHTNESS, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_CONTRAST:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_CONTRAST, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SATURATION:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SATURATION, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_HUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_HUE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAMMA:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAMMA, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_SHARPNESS:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_SHARPNESS, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_GAIN:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_GAIN, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_BACKLIGHT:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_BACK_LIGHT_COMPENSATION, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_EXPOSURE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_COLOR_EXPOSURE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
//Add image stream specific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
|
||||
}
|
||||
public:
|
||||
IplImage* retrieveFrame()
|
||||
{
|
||||
return m_frame.retrieveFrame();
|
||||
}
|
||||
protected:
|
||||
FrameInternal m_frame;
|
||||
bool prepareIplImage(PXCImage *pxcImage)
|
||||
{
|
||||
if (NULL == pxcImage)
|
||||
return false;
|
||||
PXCImage::ImageInfo info;
|
||||
pxcImage->QueryInfo(&info);
|
||||
|
||||
PXCImage::ImageData data;
|
||||
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, PXCImage::COLOR_FORMAT_RGB24, &data);
|
||||
|
||||
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
|
||||
return false;
|
||||
|
||||
cv::Mat temp(info.height, info.width, CV_8UC3, data.planes[0], data.pitches[0]);
|
||||
temp.copyTo(m_frame.m_mat);
|
||||
|
||||
pxcImage->ReleaseAccess(&data);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
class CvIntelPerCStreamDepth
|
||||
: public CvIntelPerCStreamBase
|
||||
{
|
||||
public:
|
||||
CvIntelPerCStreamDepth()
|
||||
{
|
||||
}
|
||||
virtual ~CvIntelPerCStreamDepth()
|
||||
{
|
||||
}
|
||||
|
||||
virtual bool initStream(PXCSession *session)
|
||||
{
|
||||
if (!initDevice(session))
|
||||
return false;
|
||||
initStreamImpl(PXCImage::IMAGE_TYPE_DEPTH);
|
||||
if (!m_stream.IsValid())
|
||||
return false;
|
||||
enumProfiles();
|
||||
return true;
|
||||
}
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0;
|
||||
float fret = 0.0f;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, &fret))
|
||||
return (double)fret;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_HORZ:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0f;
|
||||
PXCPointF32 ptf;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
|
||||
return (double)ptf.x;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_FOCAL_LENGTH_VERT:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return 0.0f;
|
||||
PXCPointF32 ptf;
|
||||
if (PXC_STATUS_NO_ERROR == m_device->QueryPropertyAsPoint(PXCCapture::Device::PROPERTY_DEPTH_FOCAL_LENGTH, &ptf))
|
||||
return (double)ptf.y;
|
||||
return 0.0;
|
||||
}
|
||||
break;
|
||||
//Add depth stream sepcific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::getProperty(propIdx);
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
switch (propIdx)
|
||||
{
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_LOW_CONFIDENCE_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_LOW_CONFIDENCE_VALUE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_SATURATION_VALUE:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_SATURATION_VALUE, (float)propVal));
|
||||
}
|
||||
break;
|
||||
case CV_CAP_PROP_INTELPERC_DEPTH_CONFIDENCE_THRESHOLD:
|
||||
{
|
||||
if (!m_device.IsValid())
|
||||
return false;
|
||||
return (PXC_STATUS_NO_ERROR == m_device->SetProperty(PXCCapture::Device::PROPERTY_DEPTH_CONFIDENCE_THRESHOLD, (float)propVal));
|
||||
}
|
||||
break;
|
||||
//Add depth stream sepcific properties
|
||||
}
|
||||
return CvIntelPerCStreamBase::setProperty(propIdx, propVal);
|
||||
}
|
||||
public:
|
||||
IplImage* retrieveDepthFrame()
|
||||
{
|
||||
return m_frameDepth.retrieveFrame();
|
||||
}
|
||||
IplImage* retrieveIRFrame()
|
||||
{
|
||||
return m_frameIR.retrieveFrame();
|
||||
}
|
||||
IplImage* retrieveUVFrame()
|
||||
{
|
||||
return m_frameUV.retrieveFrame();
|
||||
}
|
||||
protected:
|
||||
virtual bool validProfile(const PXCCapture::VideoStream::ProfileInfo& pinfo)
|
||||
{
|
||||
return (PXCImage::COLOR_FORMAT_DEPTH == pinfo.imageInfo.format);
|
||||
}
|
||||
protected:
|
||||
FrameInternal m_frameDepth;
|
||||
FrameInternal m_frameIR;
|
||||
FrameInternal m_frameUV;
|
||||
|
||||
bool prepareIplImage(PXCImage *pxcImage)
|
||||
{
|
||||
if (NULL == pxcImage)
|
||||
return false;
|
||||
PXCImage::ImageInfo info;
|
||||
pxcImage->QueryInfo(&info);
|
||||
|
||||
PXCImage::ImageData data;
|
||||
pxcImage->AcquireAccess(PXCImage::ACCESS_READ, &data);
|
||||
|
||||
if (PXCImage::SURFACE_TYPE_SYSTEM_MEMORY != data.type)
|
||||
return false;
|
||||
|
||||
if (PXCImage::COLOR_FORMAT_DEPTH != data.format)
|
||||
return false;
|
||||
|
||||
{
|
||||
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[0], data.pitches[0]);
|
||||
temp.copyTo(m_frameDepth.m_mat);
|
||||
}
|
||||
{
|
||||
cv::Mat temp(info.height, info.width, CV_16SC1, data.planes[1], data.pitches[1]);
|
||||
temp.copyTo(m_frameIR.m_mat);
|
||||
}
|
||||
{
|
||||
cv::Mat temp(info.height, info.width, CV_32FC2, data.planes[2], data.pitches[2]);
|
||||
temp.copyTo(m_frameUV.m_mat);
|
||||
}
|
||||
|
||||
pxcImage->ReleaseAccess(&data);
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
class CvCapture_IntelPerC : public CvCapture
|
||||
{
|
||||
public:
|
||||
CvCapture_IntelPerC(int /*index*/)
|
||||
: m_contextOpened(false)
|
||||
{
|
||||
pxcStatus sts = PXCSession_Create(&m_session);
|
||||
if (PXC_STATUS_NO_ERROR > sts)
|
||||
return;
|
||||
m_contextOpened = m_imageStream.initStream(m_session);
|
||||
m_contextOpened &= m_depthStream.initStream(m_session);
|
||||
}
|
||||
virtual ~CvCapture_IntelPerC(){}
|
||||
|
||||
virtual double getProperty(int propIdx)
|
||||
{
|
||||
double propValue = 0;
|
||||
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
|
||||
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
propValue = m_imageStream.getProperty(purePropIdx);
|
||||
}
|
||||
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
propValue = m_depthStream.getProperty(purePropIdx);
|
||||
}
|
||||
else
|
||||
{
|
||||
propValue = m_depthStream.getProperty(purePropIdx);
|
||||
}
|
||||
return propValue;
|
||||
}
|
||||
virtual bool setProperty(int propIdx, double propVal)
|
||||
{
|
||||
bool isSet = false;
|
||||
int purePropIdx = propIdx & ~CV_CAP_INTELPERC_GENERATORS_MASK;
|
||||
if (CV_CAP_INTELPERC_IMAGE_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
isSet = m_imageStream.setProperty(purePropIdx, propVal);
|
||||
}
|
||||
else if (CV_CAP_INTELPERC_DEPTH_GENERATOR == (propIdx & CV_CAP_INTELPERC_GENERATORS_MASK))
|
||||
{
|
||||
isSet = m_depthStream.setProperty(purePropIdx, propVal);
|
||||
}
|
||||
else
|
||||
{
|
||||
isSet = m_depthStream.setProperty(purePropIdx, propVal);
|
||||
}
|
||||
return isSet;
|
||||
}
|
||||
|
||||
bool grabFrame()
|
||||
{
|
||||
if (!isOpened())
|
||||
return false;
|
||||
|
||||
bool isGrabbed = false;
|
||||
if (m_depthStream.isValid())
|
||||
isGrabbed = m_depthStream.grabFrame();
|
||||
if ((m_imageStream.isValid()) && (-1 != m_imageStream.getProfileIDX()))
|
||||
isGrabbed &= m_imageStream.grabFrame();
|
||||
|
||||
return isGrabbed;
|
||||
}
|
||||
|
||||
virtual IplImage* retrieveFrame(int outputType)
|
||||
{
|
||||
IplImage* image = 0;
|
||||
switch (outputType)
|
||||
{
|
||||
case CV_CAP_INTELPERC_DEPTH_MAP:
|
||||
image = m_depthStream.retrieveDepthFrame();
|
||||
break;
|
||||
case CV_CAP_INTELPERC_UVDEPTH_MAP:
|
||||
image = m_depthStream.retrieveUVFrame();
|
||||
break;
|
||||
case CV_CAP_INTELPERC_IR_MAP:
|
||||
image = m_depthStream.retrieveIRFrame();
|
||||
break;
|
||||
case CV_CAP_INTELPERC_IMAGE:
|
||||
image = m_imageStream.retrieveFrame();
|
||||
break;
|
||||
}
|
||||
CV_Assert(NULL != image);
|
||||
return image;
|
||||
}
|
||||
|
||||
bool isOpened() const
|
||||
{
|
||||
return m_contextOpened;
|
||||
}
|
||||
protected:
|
||||
bool m_contextOpened;
|
||||
|
||||
PXCSmartPtr<PXCSession> m_session;
|
||||
CvIntelPerCStreamImage m_imageStream;
|
||||
CvIntelPerCStreamDepth m_depthStream;
|
||||
};
|
||||
|
||||
|
||||
CvCapture* cvCreateCameraCapture_IntelPerC(int index)
|
||||
{
|
||||
CvCapture_IntelPerC* capture = new CvCapture_IntelPerC(index);
|
||||
|
||||
if( capture->isOpened() )
|
||||
return capture;
|
||||
|
||||
delete capture;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
#endif //HAVE_INTELPERC
|
@ -128,6 +128,7 @@ CvCapture* cvCreateFileCapture_OpenNI( const char* filename );
|
||||
CvCapture* cvCreateCameraCapture_Android( int index );
|
||||
CvCapture* cvCreateCameraCapture_XIMEA( int index );
|
||||
CvCapture* cvCreateCameraCapture_AVFoundation(int index);
|
||||
CvCapture* cvCreateCameraCapture_IntelPerC(int index);
|
||||
|
||||
|
||||
CVAPI(int) cvHaveImageReader(const char* filename);
|
||||
|
@ -61,7 +61,6 @@
|
||||
#ifdef __GNUC__
|
||||
# pragma GCC diagnostic ignored "-Wmissing-declarations"
|
||||
#endif
|
||||
#include <MultiMon.h>
|
||||
|
||||
#include <commctrl.h>
|
||||
#include <winuser.h>
|
||||
|
@ -35,6 +35,7 @@
|
||||
defined(HAVE_XIMEA) || \
|
||||
defined(HAVE_AVFOUNDATION) || \
|
||||
defined(HAVE_GIGE_API) || \
|
||||
defined(HAVE_INTELPERC) || \
|
||||
(0)
|
||||
//defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193
|
||||
# define BUILD_WITH_CAMERA_SUPPORT 1
|
||||
|
@ -34,7 +34,7 @@ http://en.wikipedia.org/wiki/Canny_edge_detector
|
||||
|
||||
* An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.cpp
|
||||
|
||||
* (Python) An example on using the canny edge detector can be found at opencv_source_code/samples/cpp/edge.py
|
||||
* (Python) An example on using the canny edge detector can be found at opencv_source_code/samples/python/edge.py
|
||||
|
||||
cornerEigenValsAndVecs
|
||||
----------------------
|
||||
|
@ -364,7 +364,7 @@ CV_INLINE double cvContourPerimeter( const void* contour )
|
||||
}
|
||||
|
||||
|
||||
/* Calculates contour boundning rectangle (update=1) or
|
||||
/* Calculates contour bounding rectangle (update=1) or
|
||||
just retrieves pre-calculated rectangle (update=0) */
|
||||
CVAPI(CvRect) cvBoundingRect( CvArr* points, int update CV_DEFAULT(0) );
|
||||
|
||||
|
@ -41,12 +41,13 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
/*
|
||||
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
|
||||
#define USE_IPP_CANNY 1
|
||||
#else
|
||||
#undef USE_IPP_CANNY
|
||||
#endif
|
||||
|
||||
*/
|
||||
#ifdef USE_IPP_CANNY
|
||||
namespace cv
|
||||
{
|
||||
|
@ -40,10 +40,90 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// CLAHE
|
||||
|
||||
namespace clahe
|
||||
{
|
||||
static bool calcLut(cv::InputArray _src, cv::OutputArray _dst,
|
||||
const int tilesX, const int tilesY, const cv::Size tileSize,
|
||||
const int clipLimit, const float lutScale)
|
||||
{
|
||||
cv::ocl::Kernel _k("calcLut", cv::ocl::imgproc::clahe_oclsrc);
|
||||
|
||||
bool is_cpu = cv::ocl::Device::getDefault().type() == cv::ocl::Device::TYPE_CPU;
|
||||
cv::String opts;
|
||||
if(is_cpu)
|
||||
opts = "-D CPU ";
|
||||
else
|
||||
opts = cv::format("-D WAVE_SIZE=%d", _k.preferedWorkGroupSizeMultiple());
|
||||
|
||||
cv::ocl::Kernel k("calcLut", cv::ocl::imgproc::clahe_oclsrc, opts);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
cv::UMat src = _src.getUMat();
|
||||
_dst.create(tilesX * tilesY, 256, CV_8UC1);
|
||||
cv::UMat dst = _dst.getUMat();
|
||||
|
||||
int tile_size[2];
|
||||
tile_size[0] = tileSize.width;
|
||||
tile_size[1] = tileSize.height;
|
||||
|
||||
size_t localThreads[3] = { 32, 8, 1 };
|
||||
size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 };
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
|
||||
idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
idx = k.set(idx, tile_size);
|
||||
idx = k.set(idx, tilesX);
|
||||
idx = k.set(idx, clipLimit);
|
||||
idx = k.set(idx, lutScale);
|
||||
|
||||
if (!k.run(2, globalThreads, localThreads, false))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool transform(const cv::InputArray _src, cv::OutputArray _dst, const cv::InputArray _lut,
|
||||
const int tilesX, const int tilesY, const cv::Size & tileSize)
|
||||
{
|
||||
|
||||
cv::ocl::Kernel k("transform", cv::ocl::imgproc::clahe_oclsrc);
|
||||
if(k.empty())
|
||||
return false;
|
||||
|
||||
int tile_size[2];
|
||||
tile_size[0] = tileSize.width;
|
||||
tile_size[1] = tileSize.height;
|
||||
|
||||
cv::UMat src = _src.getUMat();
|
||||
_dst.create(src.size(), src.type());
|
||||
cv::UMat dst = _dst.getUMat();
|
||||
cv::UMat lut = _lut.getUMat();
|
||||
|
||||
size_t localThreads[3] = { 32, 8, 1 };
|
||||
size_t globalThreads[3] = { src.cols, src.rows, 1 };
|
||||
|
||||
int idx = 0;
|
||||
idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(src));
|
||||
idx = k.set(idx, cv::ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
idx = k.set(idx, cv::ocl::KernelArg::ReadOnlyNoSize(lut));
|
||||
idx = k.set(idx, src.cols);
|
||||
idx = k.set(idx, src.rows);
|
||||
idx = k.set(idx, tile_size);
|
||||
idx = k.set(idx, tilesX);
|
||||
idx = k.set(idx, tilesY);
|
||||
|
||||
if (!k.run(2, globalThreads, localThreads, false))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class CLAHE_CalcLut_Body : public cv::ParallelLoopBody
|
||||
@ -241,7 +321,9 @@ namespace
|
||||
int tilesY_;
|
||||
|
||||
cv::Mat srcExt_;
|
||||
cv::UMat usrcExt_;
|
||||
cv::Mat lut_;
|
||||
cv::UMat ulut_;
|
||||
};
|
||||
|
||||
CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
|
||||
@ -256,31 +338,34 @@ namespace
|
||||
|
||||
void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
|
||||
{
|
||||
cv::Mat src = _src.getMat();
|
||||
CV_Assert( _src.type() == CV_8UC1 );
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 );
|
||||
|
||||
_dst.create( src.size(), src.type() );
|
||||
cv::Mat dst = _dst.getMat();
|
||||
bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.dims()<=2;
|
||||
|
||||
const int histSize = 256;
|
||||
|
||||
lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
|
||||
|
||||
cv::Size tileSize;
|
||||
cv::Mat srcForLut;
|
||||
cv::_InputArray _srcForLut;
|
||||
|
||||
if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
|
||||
if (_src.size().width % tilesX_ == 0 && _src.size().height % tilesY_ == 0)
|
||||
{
|
||||
tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
|
||||
srcForLut = src;
|
||||
tileSize = cv::Size(_src.size().width / tilesX_, _src.size().height / tilesY_);
|
||||
_srcForLut = _src;
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101);
|
||||
|
||||
tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
|
||||
srcForLut = srcExt_;
|
||||
if(useOpenCL)
|
||||
{
|
||||
cv::copyMakeBorder(_src, usrcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
|
||||
tileSize = cv::Size(usrcExt_.size().width / tilesX_, usrcExt_.size().height / tilesY_);
|
||||
_srcForLut = usrcExt_;
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::copyMakeBorder(_src, srcExt_, 0, tilesY_ - (_src.size().height % tilesY_), 0, tilesX_ - (_src.size().width % tilesX_), cv::BORDER_REFLECT_101);
|
||||
tileSize = cv::Size(srcExt_.size().width / tilesX_, srcExt_.size().height / tilesY_);
|
||||
_srcForLut = srcExt_;
|
||||
}
|
||||
}
|
||||
|
||||
const int tileSizeTotal = tileSize.area();
|
||||
@ -293,6 +378,16 @@ namespace
|
||||
clipLimit = std::max(clipLimit, 1);
|
||||
}
|
||||
|
||||
if(useOpenCL && clahe::calcLut(_srcForLut, ulut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale) )
|
||||
if( clahe::transform(_src, _dst, ulut_, tilesX_, tilesY_, tileSize) )
|
||||
return;
|
||||
|
||||
cv::Mat src = _src.getMat();
|
||||
_dst.create( src.size(), src.type() );
|
||||
cv::Mat dst = _dst.getMat();
|
||||
cv::Mat srcForLut = _srcForLut.getMat();
|
||||
lut_.create(tilesX_ * tilesY_, histSize, CV_8UC1);
|
||||
|
||||
CLAHE_CalcLut_Body calcLutBody(srcForLut, lut_, tileSize, tilesX_, tilesY_, clipLimit, lutScale);
|
||||
cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), calcLutBody);
|
||||
|
||||
@ -325,6 +420,8 @@ namespace
|
||||
{
|
||||
srcExt_.release();
|
||||
lut_.release();
|
||||
usrcExt_.release();
|
||||
ulut_.release();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3151,7 +3151,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
|
||||
CV_Assert( scn == 3 || scn == 4 );
|
||||
_dst.create(sz, CV_MAKETYPE(depth, 1));
|
||||
dst = _dst.getMat();
|
||||
|
||||
/*
|
||||
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
|
||||
if( code == CV_BGR2GRAY )
|
||||
{
|
||||
@ -3174,7 +3174,7 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn )
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
*/
|
||||
bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
|
||||
|
||||
if( depth == CV_8U )
|
||||
|
@ -42,6 +42,7 @@
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
#include <sstream>
|
||||
|
||||
/****************************************************************************************\
|
||||
Base Image Filter
|
||||
@ -3314,6 +3315,246 @@ static bool ocl_filter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
}
|
||||
return kernel.run(2, globalsize, localsize, true);
|
||||
}
|
||||
|
||||
static bool ocl_sepRowFilter2D( UMat &src, UMat &buf, Mat &kernelX, int anchor, int borderType, bool sync)
|
||||
{
|
||||
int type = src.type();
|
||||
int cn = CV_MAT_CN(type);
|
||||
int sdepth = CV_MAT_DEPTH(type);
|
||||
Size bufSize = buf.size();
|
||||
|
||||
#ifdef ANDROID
|
||||
size_t localsize[2] = {16, 10};
|
||||
#else
|
||||
size_t localsize[2] = {16, 16};
|
||||
#endif
|
||||
size_t globalsize[2] = {DIVUP(bufSize.width, localsize[0]) * localsize[0], DIVUP(bufSize.height, localsize[1]) * localsize[1]};
|
||||
if (CV_8U == sdepth)
|
||||
{
|
||||
switch (cn)
|
||||
{
|
||||
case 1:
|
||||
globalsize[0] = DIVUP((bufSize.width + 3) >> 2, localsize[0]) * localsize[0];
|
||||
break;
|
||||
case 2:
|
||||
globalsize[0] = DIVUP((bufSize.width + 1) >> 1, localsize[0]) * localsize[0];
|
||||
break;
|
||||
case 4:
|
||||
globalsize[0] = DIVUP(bufSize.width, localsize[0]) * localsize[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int radiusX = anchor;
|
||||
int radiusY = (int)((buf.rows - src.rows) >> 1);
|
||||
|
||||
bool isIsolatedBorder = (borderType & BORDER_ISOLATED) != 0;
|
||||
const char* btype = NULL;
|
||||
switch (borderType & ~BORDER_ISOLATED)
|
||||
{
|
||||
case BORDER_CONSTANT:
|
||||
btype = "BORDER_CONSTANT";
|
||||
break;
|
||||
case BORDER_REPLICATE:
|
||||
btype = "BORDER_REPLICATE";
|
||||
break;
|
||||
case BORDER_REFLECT:
|
||||
btype = "BORDER_REFLECT";
|
||||
break;
|
||||
case BORDER_WRAP:
|
||||
btype = "BORDER_WRAP";
|
||||
break;
|
||||
case BORDER_REFLECT101:
|
||||
btype = "BORDER_REFLECT_101";
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
bool extra_extrapolation = src.rows < (int)((-radiusY + globalsize[1]) >> 1) + 1;
|
||||
extra_extrapolation |= src.rows < radiusY;
|
||||
extra_extrapolation |= src.cols < (int)((-radiusX + globalsize[0] + 8 * localsize[0] + 3) >> 1) + 1;
|
||||
extra_extrapolation |= src.cols < radiusX;
|
||||
|
||||
cv::String build_options = cv::format("-D RADIUSX=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D %s -D %s -D %s",
|
||||
radiusX, (int)localsize[0], (int)localsize[1], cn,
|
||||
btype,
|
||||
extra_extrapolation ? "EXTRA_EXTRAPOLATION" : "NO_EXTRA_EXTRAPOLATION",
|
||||
isIsolatedBorder ? "BORDER_ISOLATED" : "NO_BORDER_ISOLATED");
|
||||
|
||||
Size srcWholeSize; Point srcOffset;
|
||||
src.locateROI(srcWholeSize, srcOffset);
|
||||
|
||||
std::stringstream strKernel;
|
||||
strKernel << "row_filter";
|
||||
if (-1 != cn)
|
||||
strKernel << "_C" << cn;
|
||||
if (-1 != sdepth)
|
||||
strKernel << "_D" << sdepth;
|
||||
|
||||
ocl::Kernel kernelRow;
|
||||
if (!kernelRow.create(strKernel.str().c_str(), cv::ocl::imgproc::filterSepRow_oclsrc, build_options))
|
||||
return false;
|
||||
|
||||
int idxArg = 0;
|
||||
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(src));
|
||||
idxArg = kernelRow.set(idxArg, (int)(src.step / src.elemSize()));
|
||||
|
||||
idxArg = kernelRow.set(idxArg, srcOffset.x);
|
||||
idxArg = kernelRow.set(idxArg, srcOffset.y);
|
||||
idxArg = kernelRow.set(idxArg, src.cols);
|
||||
idxArg = kernelRow.set(idxArg, src.rows);
|
||||
idxArg = kernelRow.set(idxArg, srcWholeSize.width);
|
||||
idxArg = kernelRow.set(idxArg, srcWholeSize.height);
|
||||
|
||||
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrWriteOnly(buf));
|
||||
idxArg = kernelRow.set(idxArg, (int)(buf.step / buf.elemSize()));
|
||||
idxArg = kernelRow.set(idxArg, buf.cols);
|
||||
idxArg = kernelRow.set(idxArg, buf.rows);
|
||||
idxArg = kernelRow.set(idxArg, radiusY);
|
||||
idxArg = kernelRow.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelX.getUMat(ACCESS_READ)));
|
||||
|
||||
return kernelRow.run(2, globalsize, localsize, sync);
|
||||
}
|
||||
|
||||
static bool ocl_sepColFilter2D(UMat &buf, UMat &dst, Mat &kernelY, int anchor, bool sync)
|
||||
{
|
||||
#ifdef ANDROID
|
||||
size_t localsize[2] = {16, 10};
|
||||
#else
|
||||
size_t localsize[2] = {16, 16};
|
||||
#endif
|
||||
size_t globalsize[2] = {0, 0};
|
||||
|
||||
int type = dst.type();
|
||||
int cn = CV_MAT_CN(type);
|
||||
int ddepth = CV_MAT_DEPTH(type);
|
||||
Size sz = dst.size();
|
||||
|
||||
globalsize[1] = DIVUP(sz.height, localsize[1]) * localsize[1];
|
||||
|
||||
cv::String build_options;
|
||||
if (CV_8U == ddepth)
|
||||
{
|
||||
switch (cn)
|
||||
{
|
||||
case 1:
|
||||
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
|
||||
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "uchar", "convert_uchar_sat");
|
||||
break;
|
||||
case 2:
|
||||
globalsize[0] = DIVUP((sz.width + 1) / 2, localsize[0]) * localsize[0];
|
||||
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn, "float2", "uchar2", "convert_uchar2_sat");
|
||||
break;
|
||||
case 3:
|
||||
case 4:
|
||||
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
|
||||
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "uchar4", "convert_uchar4_sat");
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
globalsize[0] = DIVUP(sz.width, localsize[0]) * localsize[0];
|
||||
switch (dst.type())
|
||||
{
|
||||
case CV_32SC1:
|
||||
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "int", "convert_int_sat");
|
||||
break;
|
||||
case CV_32SC3:
|
||||
case CV_32SC4:
|
||||
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "int4", "convert_int4_sat");
|
||||
break;
|
||||
case CV_32FC1:
|
||||
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn, "float", "float", "");
|
||||
break;
|
||||
case CV_32FC3:
|
||||
case CV_32FC4:
|
||||
build_options = cv::format("-D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D CN=%d -D GENTYPE_SRC=%s -D GENTYPE_DST=%s -D convert_to_DST=%s",
|
||||
anchor, (int)localsize[0], (int)localsize[1], cn, "float4", "float4", "");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ocl::Kernel kernelCol;
|
||||
if (!kernelCol.create("col_filter", cv::ocl::imgproc::filterSepCol_oclsrc, build_options))
|
||||
return false;
|
||||
|
||||
int idxArg = 0;
|
||||
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(buf));
|
||||
idxArg = kernelCol.set(idxArg, (int)(buf.step / buf.elemSize()));
|
||||
idxArg = kernelCol.set(idxArg, buf.cols);
|
||||
idxArg = kernelCol.set(idxArg, buf.rows);
|
||||
|
||||
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrWriteOnly(dst));
|
||||
idxArg = kernelCol.set(idxArg, (int)(dst.offset / dst.elemSize()));
|
||||
idxArg = kernelCol.set(idxArg, (int)(dst.step / dst.elemSize()));
|
||||
idxArg = kernelCol.set(idxArg, dst.cols);
|
||||
idxArg = kernelCol.set(idxArg, dst.rows);
|
||||
idxArg = kernelCol.set(idxArg, ocl::KernelArg::PtrReadOnly(kernelY.getUMat(ACCESS_READ)));
|
||||
|
||||
return kernelCol.run(2, globalsize, localsize, sync);
|
||||
}
|
||||
|
||||
static bool ocl_sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
InputArray _kernelX, InputArray _kernelY, Point anchor,
|
||||
double delta, int borderType )
|
||||
{
|
||||
if (abs(delta)> FLT_MIN)
|
||||
return false;
|
||||
|
||||
int type = _src.type();
|
||||
if ((CV_8UC1 != type) && (CV_8UC4 == type) &&
|
||||
(CV_32FC1 != type) && (CV_32FC4 == type))
|
||||
return false;
|
||||
|
||||
int cn = CV_MAT_CN(type);
|
||||
|
||||
Mat kernelX = _kernelX.getMat().reshape(1, 1);
|
||||
if (1 != (kernelX.cols % 2))
|
||||
return false;
|
||||
Mat kernelY = _kernelY.getMat().reshape(1, 1);
|
||||
if (1 != (kernelY.cols % 2))
|
||||
return false;
|
||||
|
||||
int sdepth = CV_MAT_DEPTH(type);
|
||||
if( anchor.x < 0 )
|
||||
anchor.x = kernelX.cols >> 1;
|
||||
if( anchor.y < 0 )
|
||||
anchor.y = kernelY.cols >> 1;
|
||||
|
||||
if( ddepth < 0 )
|
||||
ddepth = sdepth;
|
||||
else if (ddepth != sdepth)
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
Size srcWholeSize; Point srcOffset;
|
||||
src.locateROI(srcWholeSize, srcOffset);
|
||||
if ( (0 != (srcOffset.x % 4)) ||
|
||||
(0 != (src.cols % 4)) ||
|
||||
(0 != ((src.step / src.elemSize()) % 4))
|
||||
)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
Size srcSize = src.size();
|
||||
Size bufSize(srcSize.width, srcSize.height + kernelY.cols - 1);
|
||||
UMat buf; buf.create(bufSize, CV_MAKETYPE(CV_32F, cn));
|
||||
if (!ocl_sepRowFilter2D(src, buf, kernelX, anchor.x, borderType, true))
|
||||
return false;
|
||||
|
||||
_dst.create(srcSize, CV_MAKETYPE(ddepth, cn));
|
||||
UMat dst = _dst.getUMat();
|
||||
return ocl_sepColFilter2D(buf, dst, kernelY, anchor.y, true);
|
||||
}
|
||||
}
|
||||
|
||||
cv::Ptr<cv::BaseFilter> cv::getLinearFilter(int srcType, int dstType,
|
||||
@ -3481,6 +3722,10 @@ void cv::sepFilter2D( InputArray _src, OutputArray _dst, int ddepth,
|
||||
InputArray _kernelX, InputArray _kernelY, Point anchor,
|
||||
double delta, int borderType )
|
||||
{
|
||||
bool use_opencl = ocl::useOpenCL() && _dst.isUMat();
|
||||
if( use_opencl && ocl_sepFilter2D(_src, _dst, ddepth, _kernelX, _kernelY, anchor, delta, borderType))
|
||||
return;
|
||||
|
||||
Mat src = _src.getMat(), kernelX = _kernelX.getMat(), kernelY = _kernelY.getMat();
|
||||
|
||||
if( ddepth < 0 )
|
||||
|
@ -1930,13 +1930,159 @@ void cv::calcBackProject( const Mat* images, int nimages, const int* channels,
|
||||
}
|
||||
|
||||
|
||||
namespace cv {
|
||||
|
||||
static void getUMatIndex(const std::vector<UMat> & um, int cn, int & idx, int & cnidx)
|
||||
{
|
||||
int totalChannels = 0;
|
||||
for (size_t i = 0, size = um.size(); i < size; ++i)
|
||||
{
|
||||
int ccn = um[i].channels();
|
||||
totalChannels += ccn;
|
||||
|
||||
if (totalChannels == cn)
|
||||
{
|
||||
idx = (int)(i + 1);
|
||||
cnidx = 0;
|
||||
return;
|
||||
}
|
||||
else if (totalChannels > cn)
|
||||
{
|
||||
idx = (int)i;
|
||||
cnidx = i == 0 ? cn : (cn - totalChannels + ccn);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
idx = cnidx = -1;
|
||||
}
|
||||
|
||||
static bool ocl_calcBackProject( InputArrayOfArrays _images, std::vector<int> channels,
|
||||
InputArray _hist, OutputArray _dst,
|
||||
const std::vector<float>& ranges,
|
||||
float scale, size_t histdims )
|
||||
{
|
||||
const std::vector<UMat> & images = *(const std::vector<UMat> *)_images.getObj();
|
||||
size_t nimages = images.size(), totalcn = images[0].channels();
|
||||
|
||||
CV_Assert(nimages > 0);
|
||||
Size size = images[0].size();
|
||||
int depth = images[0].depth();
|
||||
|
||||
for (size_t i = 1; i < nimages; ++i)
|
||||
{
|
||||
const UMat & m = images[i];
|
||||
totalcn += m.channels();
|
||||
CV_Assert(size == m.size() && depth == m.depth());
|
||||
}
|
||||
|
||||
std::sort(channels.begin(), channels.end());
|
||||
for (size_t i = 0; i < histdims; ++i)
|
||||
CV_Assert(channels[i] < (int)totalcn);
|
||||
|
||||
if (histdims == 1)
|
||||
{
|
||||
int idx, cnidx;
|
||||
getUMatIndex(images, channels[0], idx, cnidx);
|
||||
CV_Assert(idx >= 0);
|
||||
UMat im = images[idx];
|
||||
|
||||
String opts = format("-D histdims=1 -D scn=%d", im.channels());
|
||||
ocl::Kernel lutk("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
|
||||
if (lutk.empty())
|
||||
return false;
|
||||
|
||||
size_t lsize = 256;
|
||||
UMat lut(1, (int)lsize, CV_32SC1), hist = _hist.getUMat(), uranges(ranges, true);
|
||||
|
||||
lutk.args(ocl::KernelArg::ReadOnlyNoSize(hist), hist.rows,
|
||||
ocl::KernelArg::PtrWriteOnly(lut), scale, ocl::KernelArg::PtrReadOnly(uranges));
|
||||
if (!lutk.run(1, &lsize, NULL, false))
|
||||
return false;
|
||||
|
||||
ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
|
||||
if (mapk.empty())
|
||||
return false;
|
||||
|
||||
_dst.create(size, depth);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
im.offset += cnidx;
|
||||
mapk.args(ocl::KernelArg::ReadOnlyNoSize(im), ocl::KernelArg::PtrReadOnly(lut),
|
||||
ocl::KernelArg::WriteOnly(dst));
|
||||
|
||||
size_t globalsize[2] = { size.width, size.height };
|
||||
return mapk.run(2, globalsize, NULL, false);
|
||||
}
|
||||
else if (histdims == 2)
|
||||
{
|
||||
int idx0, idx1, cnidx0, cnidx1;
|
||||
getUMatIndex(images, channels[0], idx0, cnidx0);
|
||||
getUMatIndex(images, channels[1], idx1, cnidx1);
|
||||
CV_Assert(idx0 >= 0 && idx1 >= 0);
|
||||
UMat im0 = images[idx0], im1 = images[idx1];
|
||||
|
||||
// Lut for the first dimension
|
||||
String opts = format("-D histdims=2 -D scn1=%d -D scn2=%d", im0.channels(), im1.channels());
|
||||
ocl::Kernel lutk1("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
|
||||
if (lutk1.empty())
|
||||
return false;
|
||||
|
||||
size_t lsize = 256;
|
||||
UMat lut(1, (int)lsize<<1, CV_32SC1), uranges(ranges, true), hist = _hist.getUMat();
|
||||
|
||||
lutk1.args(hist.rows, ocl::KernelArg::PtrWriteOnly(lut), (int)0, ocl::KernelArg::PtrReadOnly(uranges), (int)0);
|
||||
if (!lutk1.run(1, &lsize, NULL, false))
|
||||
return false;
|
||||
|
||||
// lut for the second dimension
|
||||
ocl::Kernel lutk2("calcLUT", ocl::imgproc::calc_back_project_oclsrc, opts);
|
||||
if (lutk2.empty())
|
||||
return false;
|
||||
|
||||
lut.offset += lsize * sizeof(int);
|
||||
lutk2.args(hist.cols, ocl::KernelArg::PtrWriteOnly(lut), (int)256, ocl::KernelArg::PtrReadOnly(uranges), (int)2);
|
||||
if (!lutk2.run(1, &lsize, NULL, false))
|
||||
return false;
|
||||
|
||||
// perform lut
|
||||
ocl::Kernel mapk("LUT", ocl::imgproc::calc_back_project_oclsrc, opts);
|
||||
if (mapk.empty())
|
||||
return false;
|
||||
|
||||
_dst.create(size, depth);
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
im0.offset += cnidx0;
|
||||
im1.offset += cnidx1;
|
||||
mapk.args(ocl::KernelArg::ReadOnlyNoSize(im0), ocl::KernelArg::ReadOnlyNoSize(im1),
|
||||
ocl::KernelArg::ReadOnlyNoSize(hist), ocl::KernelArg::PtrReadOnly(lut), scale, ocl::KernelArg::WriteOnly(dst));
|
||||
|
||||
size_t globalsize[2] = { size.width, size.height };
|
||||
return mapk.run(2, globalsize, NULL, false);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& channels,
|
||||
InputArray hist, OutputArray dst,
|
||||
const std::vector<float>& ranges,
|
||||
double scale )
|
||||
{
|
||||
Size histSize = hist.size();
|
||||
bool _1D = histSize.height == 1 || histSize.width == 1;
|
||||
size_t histdims = _1D ? 1 : hist.dims();
|
||||
|
||||
if (ocl::useOpenCL() && images.isUMatVector() && dst.isUMat() && hist.type() == CV_32FC1 &&
|
||||
histdims <= 2 && ranges.size() == histdims * 2 && histdims == channels.size() &&
|
||||
ocl_calcBackProject(images, channels, hist, dst, ranges, (float)scale, histdims))
|
||||
return;
|
||||
|
||||
Mat H0 = hist.getMat(), H;
|
||||
int hcn = H0.channels();
|
||||
|
||||
if( hcn > 1 )
|
||||
{
|
||||
CV_Assert( H0.isContinuous() );
|
||||
@ -1947,12 +2093,15 @@ void cv::calcBackProject( InputArrayOfArrays images, const std::vector<int>& cha
|
||||
}
|
||||
else
|
||||
H = H0;
|
||||
|
||||
bool _1d = H.rows == 1 || H.cols == 1;
|
||||
int i, dims = H.dims, rsz = (int)ranges.size(), csz = (int)channels.size();
|
||||
int nimages = (int)images.total();
|
||||
|
||||
CV_Assert(nimages > 0);
|
||||
CV_Assert(rsz == dims*2 || (rsz == 2 && _1d) || (rsz == 0 && images.depth(0) == CV_8U));
|
||||
CV_Assert(csz == 0 || csz == dims || (csz == 1 && _1d));
|
||||
|
||||
float* _ranges[CV_MAX_DIM];
|
||||
if( rsz > 0 )
|
||||
{
|
||||
@ -3169,7 +3318,7 @@ static bool ocl_calcHist(InputArray _src, OutputArray _hist)
|
||||
|
||||
static bool ocl_equalizeHist(InputArray _src, OutputArray _dst)
|
||||
{
|
||||
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
|
||||
size_t wgs = std::min<size_t>(ocl::Device::getDefault().maxWorkGroupSize(), BINS);
|
||||
|
||||
// calculation of histogram
|
||||
UMat hist;
|
||||
|
@ -2212,7 +2212,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
int depth = src.depth(), cn = src.channels();
|
||||
double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y;
|
||||
int k, sx, sy, dx, dy;
|
||||
|
||||
/*
|
||||
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
|
||||
int mode = interpolation == INTER_LINEAR ? IPPI_INTER_LINEAR : 0;
|
||||
int type = src.type();
|
||||
@ -2240,7 +2240,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize,
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
*/
|
||||
if( interpolation == INTER_NEAREST )
|
||||
{
|
||||
resizeNN( src, dst, inv_scale_x, inv_scale_y );
|
||||
@ -3299,7 +3299,10 @@ public:
|
||||
if( m1->type() == CV_16SC2 && (m2->type() == CV_16UC1 || m2->type() == CV_16SC1) )
|
||||
{
|
||||
bufxy = (*m1)(Rect(x, y, bcols, brows));
|
||||
bufa = (*m2)(Rect(x, y, bcols, brows));
|
||||
|
||||
const ushort* sA = (const ushort*)(m2->data + m2->step*(y+y1)) + x;
|
||||
for( x1 = 0; x1 < bcols; x1++ )
|
||||
A[x1] = (ushort)(sA[x1] & (INTER_TAB_SIZE2-1));
|
||||
}
|
||||
else if( planar_input )
|
||||
{
|
||||
@ -3680,7 +3683,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
|
||||
{
|
||||
for( x = 0; x < size.width; x++ )
|
||||
{
|
||||
int fxy = src2 ? src2[x] : 0;
|
||||
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1) : 0;
|
||||
dst1f[x] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
|
||||
dst2f[x] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
|
||||
}
|
||||
@ -3689,7 +3692,7 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
|
||||
{
|
||||
for( x = 0; x < size.width; x++ )
|
||||
{
|
||||
int fxy = src2 ? src2[x] : 0;
|
||||
int fxy = src2 ? src2[x] & (INTER_TAB_SIZE2-1): 0;
|
||||
dst1f[x*2] = src1[x*2] + (fxy & (INTER_TAB_SIZE-1))*scale;
|
||||
dst1f[x*2+1] = src1[x*2+1] + (fxy >> INTER_BITS)*scale;
|
||||
}
|
||||
@ -4000,7 +4003,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
|
||||
int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols;
|
||||
const int AB_BITS = MAX(10, (int)INTER_BITS);
|
||||
const int AB_SCALE = 1 << AB_BITS;
|
||||
|
||||
/*
|
||||
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
|
||||
int depth = src.depth();
|
||||
int channels = src.channels();
|
||||
@ -4044,7 +4047,7 @@ void cv::warpAffine( InputArray _src, OutputArray _dst,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*/
|
||||
for( x = 0; x < dst.cols; x++ )
|
||||
{
|
||||
adelta[x] = saturate_cast<int>(M[0]*x*AB_SCALE);
|
||||
@ -4231,7 +4234,7 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
|
||||
|
||||
if( !(flags & WARP_INVERSE_MAP) )
|
||||
invert(matM, matM);
|
||||
|
||||
/*
|
||||
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
|
||||
int depth = src.depth();
|
||||
int channels = src.channels();
|
||||
@ -4275,7 +4278,7 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*/
|
||||
Range range(0, dst.rows);
|
||||
warpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue);
|
||||
parallel_for_(range, invoker, dst.total()/(double)(1<<16));
|
||||
|
@ -39,6 +39,7 @@
|
||||
//
|
||||
//M*/
|
||||
#include "precomp.hpp"
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
namespace cv
|
||||
{
|
||||
@ -362,106 +363,175 @@ Moments::Moments( double _m00, double _m10, double _m01, double _m20, double _m1
|
||||
nu30 = mu30*s3; nu21 = mu21*s3; nu12 = mu12*s3; nu03 = mu03*s3;
|
||||
}
|
||||
|
||||
static bool ocl_moments( InputArray _src, Moments& m)
|
||||
{
|
||||
const int TILE_SIZE = 32;
|
||||
const int K = 10;
|
||||
ocl::Kernel k("moments", ocl::imgproc::moments_oclsrc, format("-D TILE_SIZE=%d", TILE_SIZE));
|
||||
if( k.empty() )
|
||||
return false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
Size sz = src.size();
|
||||
int xtiles = (sz.width + TILE_SIZE-1)/TILE_SIZE;
|
||||
int ytiles = (sz.height + TILE_SIZE-1)/TILE_SIZE;
|
||||
int ntiles = xtiles*ytiles;
|
||||
UMat umbuf(1, ntiles*K, CV_32S);
|
||||
|
||||
size_t globalsize[] = {xtiles, sz.height}, localsize[] = {1, TILE_SIZE};
|
||||
bool ok = k.args(ocl::KernelArg::ReadOnly(src),
|
||||
ocl::KernelArg::PtrWriteOnly(umbuf),
|
||||
xtiles).run(2, globalsize, localsize, true);
|
||||
if(!ok)
|
||||
return false;
|
||||
Mat mbuf = umbuf.getMat(ACCESS_READ);
|
||||
for( int i = 0; i < ntiles; i++ )
|
||||
{
|
||||
double x = (i % xtiles)*TILE_SIZE, y = (i / xtiles)*TILE_SIZE;
|
||||
const int* mom = mbuf.ptr<int>() + i*K;
|
||||
double xm = x * mom[0], ym = y * mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
m.m00 += mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
m.m10 += mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
m.m01 += mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
m.m20 += mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
m.m02 += mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
cv::Moments cv::moments( InputArray _src, bool binary )
|
||||
{
|
||||
const int TILE_SIZE = 32;
|
||||
Mat mat = _src.getMat();
|
||||
MomentsInTileFunc func = 0;
|
||||
uchar nzbuf[TILE_SIZE*TILE_SIZE];
|
||||
Moments m;
|
||||
int type = mat.type();
|
||||
int type = _src.type();
|
||||
int depth = CV_MAT_DEPTH( type );
|
||||
int cn = CV_MAT_CN( type );
|
||||
|
||||
if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
|
||||
return contourMoments(mat);
|
||||
|
||||
Size size = mat.size();
|
||||
|
||||
if( cn > 1 )
|
||||
CV_Error( CV_StsBadArg, "Invalid image type" );
|
||||
Size size = _src.size();
|
||||
|
||||
if( size.width <= 0 || size.height <= 0 )
|
||||
return m;
|
||||
|
||||
if( binary || depth == CV_8U )
|
||||
func = momentsInTile<uchar, int, int>;
|
||||
else if( depth == CV_16U )
|
||||
func = momentsInTile<ushort, int, int64>;
|
||||
else if( depth == CV_16S )
|
||||
func = momentsInTile<short, int, int64>;
|
||||
else if( depth == CV_32F )
|
||||
func = momentsInTile<float, double, double>;
|
||||
else if( depth == CV_64F )
|
||||
func = momentsInTile<double, double, double>;
|
||||
if( ocl::useOpenCL() && type == CV_8UC1 && !binary &&
|
||||
_src.isUMat() && ocl_moments(_src, m) )
|
||||
;
|
||||
else
|
||||
CV_Error( CV_StsUnsupportedFormat, "" );
|
||||
|
||||
Mat src0(mat);
|
||||
|
||||
for( int y = 0; y < size.height; y += TILE_SIZE )
|
||||
{
|
||||
Size tileSize;
|
||||
tileSize.height = std::min(TILE_SIZE, size.height - y);
|
||||
Mat mat = _src.getMat();
|
||||
if( mat.checkVector(2) >= 0 && (depth == CV_32F || depth == CV_32S))
|
||||
return contourMoments(mat);
|
||||
|
||||
for( int x = 0; x < size.width; x += TILE_SIZE )
|
||||
if( cn > 1 )
|
||||
CV_Error( CV_StsBadArg, "Invalid image type (must be single-channel)" );
|
||||
|
||||
if( binary || depth == CV_8U )
|
||||
func = momentsInTile<uchar, int, int>;
|
||||
else if( depth == CV_16U )
|
||||
func = momentsInTile<ushort, int, int64>;
|
||||
else if( depth == CV_16S )
|
||||
func = momentsInTile<short, int, int64>;
|
||||
else if( depth == CV_32F )
|
||||
func = momentsInTile<float, double, double>;
|
||||
else if( depth == CV_64F )
|
||||
func = momentsInTile<double, double, double>;
|
||||
else
|
||||
CV_Error( CV_StsUnsupportedFormat, "" );
|
||||
|
||||
Mat src0(mat);
|
||||
|
||||
for( int y = 0; y < size.height; y += TILE_SIZE )
|
||||
{
|
||||
tileSize.width = std::min(TILE_SIZE, size.width - x);
|
||||
Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));
|
||||
Size tileSize;
|
||||
tileSize.height = std::min(TILE_SIZE, size.height - y);
|
||||
|
||||
if( binary )
|
||||
for( int x = 0; x < size.width; x += TILE_SIZE )
|
||||
{
|
||||
cv::Mat tmp(tileSize, CV_8U, nzbuf);
|
||||
cv::compare( src, 0, tmp, CV_CMP_NE );
|
||||
src = tmp;
|
||||
tileSize.width = std::min(TILE_SIZE, size.width - x);
|
||||
Mat src(src0, cv::Rect(x, y, tileSize.width, tileSize.height));
|
||||
|
||||
if( binary )
|
||||
{
|
||||
cv::Mat tmp(tileSize, CV_8U, nzbuf);
|
||||
cv::compare( src, 0, tmp, CV_CMP_NE );
|
||||
src = tmp;
|
||||
}
|
||||
|
||||
double mom[10];
|
||||
func( src, mom );
|
||||
|
||||
if(binary)
|
||||
{
|
||||
double s = 1./255;
|
||||
for( int k = 0; k < 10; k++ )
|
||||
mom[k] *= s;
|
||||
}
|
||||
|
||||
double xm = x * mom[0], ym = y * mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
m.m00 += mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
m.m10 += mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
m.m01 += mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
m.m20 += mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
m.m02 += mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
|
||||
double mom[10];
|
||||
func( src, mom );
|
||||
|
||||
if(binary)
|
||||
{
|
||||
double s = 1./255;
|
||||
for( int k = 0; k < 10; k++ )
|
||||
mom[k] *= s;
|
||||
}
|
||||
|
||||
double xm = x * mom[0], ym = y * mom[0];
|
||||
|
||||
// accumulate moments computed in each tile
|
||||
|
||||
// + m00 ( = m00' )
|
||||
m.m00 += mom[0];
|
||||
|
||||
// + m10 ( = m10' + x*m00' )
|
||||
m.m10 += mom[1] + xm;
|
||||
|
||||
// + m01 ( = m01' + y*m00' )
|
||||
m.m01 += mom[2] + ym;
|
||||
|
||||
// + m20 ( = m20' + 2*x*m10' + x*x*m00' )
|
||||
m.m20 += mom[3] + x * (mom[1] * 2 + xm);
|
||||
|
||||
// + m11 ( = m11' + x*m01' + y*m10' + x*y*m00' )
|
||||
m.m11 += mom[4] + x * (mom[2] + ym) + y * mom[1];
|
||||
|
||||
// + m02 ( = m02' + 2*y*m01' + y*y*m00' )
|
||||
m.m02 += mom[5] + y * (mom[2] * 2 + ym);
|
||||
|
||||
// + m30 ( = m30' + 3*x*m20' + 3*x*x*m10' + x*x*x*m00' )
|
||||
m.m30 += mom[6] + x * (3. * mom[3] + x * (3. * mom[1] + xm));
|
||||
|
||||
// + m21 ( = m21' + x*(2*m11' + 2*y*m10' + x*m01' + x*y*m00') + y*m20')
|
||||
m.m21 += mom[7] + x * (2 * (mom[4] + y * mom[1]) + x * (mom[2] + ym)) + y * mom[3];
|
||||
|
||||
// + m12 ( = m12' + y*(2*m11' + 2*x*m01' + y*m10' + x*y*m00') + x*m02')
|
||||
m.m12 += mom[8] + y * (2 * (mom[4] + x * mom[2]) + y * (mom[1] + xm)) + x * mom[5];
|
||||
|
||||
// + m03 ( = m03' + 3*y*m02' + 3*y*y*m01' + y*y*y*m00' )
|
||||
m.m03 += mom[9] + y * (3. * mom[5] + y * (3. * mom[2] + ym));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -43,6 +43,7 @@
|
||||
#include "precomp.hpp"
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
/****************************************************************************************\
|
||||
Basic Morphological Operations: Erosion & Dilation
|
||||
@ -1283,11 +1284,132 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst,
|
||||
}
|
||||
#endif
|
||||
|
||||
static const char* op2str[] = {"ERODE", "DILATE"};
|
||||
|
||||
static bool ocl_morphology_op(InputArray _src, OutputArray _dst, InputArray _kernel, Size &ksize, const Point anchor, int iterations, int op)
|
||||
{
|
||||
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
|
||||
|
||||
if (_src.depth() == CV_64F && !doubleSupport)
|
||||
return false;
|
||||
|
||||
UMat kernel8U;
|
||||
_kernel.getUMat().convertTo(kernel8U, CV_8U);
|
||||
UMat kernel = kernel8U.reshape(1, 1);
|
||||
|
||||
bool rectKernel = true;
|
||||
for(int i = 0; i < kernel.rows * kernel.cols; ++i)
|
||||
if(kernel.getMat(ACCESS_READ).at<uchar>(i) != 1)
|
||||
rectKernel = false;
|
||||
|
||||
UMat src = _src.getUMat();
|
||||
|
||||
#ifdef ANDROID
|
||||
size_t localThreads[3] = {16, 8, 1};
|
||||
#else
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
#endif
|
||||
size_t globalThreads[3] = {(src.cols + localThreads[0] - 1) / localThreads[0] *localThreads[0], (src.rows + localThreads[1] - 1) / localThreads[1] *localThreads[1], 1};
|
||||
|
||||
if(localThreads[0]*localThreads[1] * 2 < (localThreads[0] + ksize.width - 1) * (localThreads[1] + ksize.height - 1))
|
||||
return false;
|
||||
|
||||
char compile_option[128];
|
||||
sprintf(compile_option, "-D RADIUSX=%d -D RADIUSY=%d -D LSIZE0=%d -D LSIZE1=%d -D %s %s %s -D GENTYPE=%s -D DEPTH_%d",
|
||||
anchor.x, anchor.y, (int)localThreads[0], (int)localThreads[1], op2str[op], doubleSupport?"-D DOUBLE_SUPPORT" :"", rectKernel?"-D RECTKERNEL":"",
|
||||
ocl::typeToStr(_src.type()), _src.depth() );
|
||||
|
||||
std::vector<ocl::Kernel> kernels;
|
||||
for(int i = 0; i<iterations; i++)
|
||||
{
|
||||
ocl::Kernel k( "morph", ocl::imgproc::morph_oclsrc, compile_option);
|
||||
if (k.empty())
|
||||
return false;
|
||||
kernels.push_back(k);
|
||||
}
|
||||
|
||||
_dst.create(src.size(), src.type());
|
||||
UMat dst = _dst.getUMat();
|
||||
|
||||
if( iterations== 1 && src.u != dst.u)
|
||||
{
|
||||
Size wholesize;
|
||||
Point ofs;
|
||||
src.locateROI(wholesize, ofs);
|
||||
int wholecols = wholesize.width, wholerows = wholesize.height;
|
||||
|
||||
int idxArg = 0;
|
||||
idxArg = kernels[0].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(src));
|
||||
idxArg = kernels[0].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
idxArg = kernels[0].set(idxArg, ofs.x);
|
||||
idxArg = kernels[0].set(idxArg, ofs.y);
|
||||
idxArg = kernels[0].set(idxArg, src.cols);
|
||||
idxArg = kernels[0].set(idxArg, src.rows);
|
||||
idxArg = kernels[0].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel));
|
||||
idxArg = kernels[0].set(idxArg, wholecols);
|
||||
idxArg = kernels[0].set(idxArg, wholerows);
|
||||
|
||||
return kernels[0].run(2, globalThreads, localThreads, false);
|
||||
}
|
||||
|
||||
for(int i = 0; i< iterations; i++)
|
||||
{
|
||||
UMat source;
|
||||
Size wholesize;
|
||||
Point ofs;
|
||||
if( i == 0)
|
||||
{
|
||||
int cols = src.cols, rows = src.rows;
|
||||
src.locateROI(wholesize,ofs);
|
||||
src.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
|
||||
src.copyTo(source);
|
||||
src.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
||||
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
||||
}
|
||||
else
|
||||
{
|
||||
int cols = dst.cols, rows = dst.rows;
|
||||
dst.locateROI(wholesize,ofs);
|
||||
dst.adjustROI(ofs.y, wholesize.height - rows - ofs.y, ofs.x, wholesize.width - cols - ofs.x);
|
||||
dst.copyTo(source);
|
||||
dst.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
||||
source.adjustROI(-ofs.y, -wholesize.height + rows + ofs.y, -ofs.x, -wholesize.width + cols + ofs.x);
|
||||
}
|
||||
|
||||
source.locateROI(wholesize, ofs);
|
||||
int wholecols = wholesize.width, wholerows = wholesize.height;
|
||||
|
||||
int idxArg = 0;
|
||||
idxArg = kernels[i].set(idxArg, ocl::KernelArg::ReadOnlyNoSize(source));
|
||||
idxArg = kernels[i].set(idxArg, ocl::KernelArg::WriteOnlyNoSize(dst));
|
||||
idxArg = kernels[i].set(idxArg, ofs.x);
|
||||
idxArg = kernels[i].set(idxArg, ofs.y);
|
||||
idxArg = kernels[i].set(idxArg, source.cols);
|
||||
idxArg = kernels[i].set(idxArg, source.rows);
|
||||
idxArg = kernels[i].set(idxArg, ocl::KernelArg::PtrReadOnly(kernel));
|
||||
idxArg = kernels[i].set(idxArg, wholecols);
|
||||
idxArg = kernels[i].set(idxArg, wholerows);
|
||||
|
||||
if (!kernels[i].run(2, globalThreads, localThreads, false))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void morphOp( int op, InputArray _src, OutputArray _dst,
|
||||
InputArray _kernel,
|
||||
Point anchor, int iterations,
|
||||
int borderType, const Scalar& borderValue )
|
||||
{
|
||||
int src_type = _src.type(), dst_type = _dst.type(),
|
||||
src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);
|
||||
|
||||
bool useOpenCL = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
|
||||
_src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) &&
|
||||
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
|
||||
(borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue()) &&
|
||||
(op == MORPH_ERODE || op == MORPH_DILATE);
|
||||
|
||||
Mat kernel = _kernel.getMat();
|
||||
Size ksize = kernel.data ? kernel.size() : Size(3,3);
|
||||
anchor = normalizeAnchor(anchor, ksize);
|
||||
@ -1299,13 +1421,11 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
|
||||
return;
|
||||
#endif
|
||||
|
||||
Mat src = _src.getMat();
|
||||
|
||||
_dst.create( src.size(), src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
if( iterations == 0 || kernel.rows*kernel.cols == 1 )
|
||||
{
|
||||
Mat src = _src.getMat();
|
||||
_dst.create( src.size(), src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
src.copyTo(dst);
|
||||
return;
|
||||
}
|
||||
@ -1326,6 +1446,14 @@ static void morphOp( int op, InputArray _src, OutputArray _dst,
|
||||
iterations = 1;
|
||||
}
|
||||
|
||||
if (useOpenCL && ocl_morphology_op(_src, _dst, kernel, ksize, anchor, iterations, op) )
|
||||
return;
|
||||
|
||||
Mat src = _src.getMat();
|
||||
|
||||
_dst.create( src.size(), src.type() );
|
||||
Mat dst = _dst.getMat();
|
||||
|
||||
int nStripes = 1;
|
||||
#if defined HAVE_TEGRA_OPTIMIZATION
|
||||
if (src.data != dst.data && iterations == 1 && //NOTE: threads are not used for inplace processing
|
||||
@ -1362,49 +1490,97 @@ void cv::dilate( InputArray src, OutputArray dst, InputArray kernel,
|
||||
morphOp( MORPH_DILATE, src, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
}
|
||||
|
||||
|
||||
void cv::morphologyEx( InputArray _src, OutputArray _dst, int op,
|
||||
InputArray kernel, Point anchor, int iterations,
|
||||
int borderType, const Scalar& borderValue )
|
||||
{
|
||||
Mat src = _src.getMat(), temp;
|
||||
_dst.create(src.size(), src.type());
|
||||
Mat dst = _dst.getMat();
|
||||
int src_type = _src.type(), dst_type = _dst.type(),
|
||||
src_cn = CV_MAT_CN(src_type), src_depth = CV_MAT_DEPTH(src_type);
|
||||
|
||||
bool use_opencl = cv::ocl::useOpenCL() && _src.isUMat() && _src.size() == _dst.size() && src_type == dst_type &&
|
||||
_src.dims()<=2 && (src_cn == 1 || src_cn == 4) && (anchor.x == -1) && (anchor.y == -1) &&
|
||||
(src_depth == CV_8U || src_depth == CV_32F || src_depth == CV_64F ) &&
|
||||
(borderType == cv::BORDER_CONSTANT) && (borderValue == morphologyDefaultBorderValue());
|
||||
|
||||
_dst.create(_src.size(), _src.type());
|
||||
Mat src, dst, temp;
|
||||
UMat usrc, udst, utemp;
|
||||
|
||||
switch( op )
|
||||
{
|
||||
case MORPH_ERODE:
|
||||
erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
break;
|
||||
case MORPH_DILATE:
|
||||
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
break;
|
||||
case MORPH_OPEN:
|
||||
erode( src, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( dst, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
erode( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( _dst, _dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
break;
|
||||
case CV_MOP_CLOSE:
|
||||
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
erode( dst, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
erode( _dst, _dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
break;
|
||||
case CV_MOP_GRADIENT:
|
||||
erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( src, dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
dst -= temp;
|
||||
erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( _src, _dst, kernel, anchor, iterations, borderType, borderValue );
|
||||
if(use_opencl)
|
||||
{
|
||||
udst = _dst.getUMat();
|
||||
subtract(udst, utemp, udst);
|
||||
}
|
||||
else
|
||||
{
|
||||
dst = _dst.getMat();
|
||||
dst -= temp;
|
||||
}
|
||||
break;
|
||||
case CV_MOP_TOPHAT:
|
||||
if( src.data != dst.data )
|
||||
temp = dst;
|
||||
erode( src, temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( temp, temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
dst = src - temp;
|
||||
if(use_opencl)
|
||||
{
|
||||
usrc = _src.getUMat();
|
||||
udst = _dst.getUMat();
|
||||
if( usrc.u != udst.u )
|
||||
utemp = udst;
|
||||
}
|
||||
else
|
||||
{
|
||||
src = _src.getMat();
|
||||
dst = _dst.getMat();
|
||||
if( src.data != dst.data )
|
||||
temp = dst;
|
||||
}
|
||||
erode( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
dilate( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel,
|
||||
anchor, iterations, borderType, borderValue );
|
||||
if(use_opencl)
|
||||
subtract(usrc, utemp, udst);
|
||||
else
|
||||
dst = src - temp;
|
||||
break;
|
||||
case CV_MOP_BLACKHAT:
|
||||
if( src.data != dst.data )
|
||||
temp = dst;
|
||||
dilate( src, temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
erode( temp, temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
dst = temp - src;
|
||||
if(use_opencl)
|
||||
{
|
||||
usrc = _src.getUMat();
|
||||
udst = _dst.getUMat();
|
||||
if( usrc.u != udst.u )
|
||||
utemp = udst;
|
||||
}
|
||||
else
|
||||
{
|
||||
src = _src.getMat();
|
||||
dst = _dst.getMat();
|
||||
if( src.data != dst.data )
|
||||
temp = dst;
|
||||
}
|
||||
dilate( _src, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel, anchor, iterations, borderType, borderValue );
|
||||
erode( use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, use_opencl ? (cv::OutputArray)utemp : (cv::OutputArray)temp, kernel,
|
||||
anchor, iterations, borderType, borderValue );
|
||||
if(use_opencl)
|
||||
subtract(utemp, usrc, udst);
|
||||
else
|
||||
dst = temp - src;
|
||||
break;
|
||||
default:
|
||||
CV_Error( CV_StsBadArg, "unknown morphological operation" );
|
||||
|
135
modules/imgproc/src/opencl/calc_back_project.cl
Normal file
135
modules/imgproc/src/opencl/calc_back_project.cl
Normal file
@ -0,0 +1,135 @@
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Xu Pang, pangxu010@163.com
|
||||
// Wenju He, wenju@multicorewareinc.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
|
||||
#define OUT_OF_RANGE -1
|
||||
|
||||
#if histdims == 1
|
||||
|
||||
__kernel void calcLUT(__global const uchar * histptr, int hist_step, int hist_offset, int hist_bins,
|
||||
__global int * lut, float scale, __constant float * ranges)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
float value = convert_float(x);
|
||||
|
||||
if (value > ranges[1] || value < ranges[0])
|
||||
lut[x] = OUT_OF_RANGE;
|
||||
else
|
||||
{
|
||||
float lb = ranges[0], ub = ranges[1], gap = (ub - lb) / hist_bins;
|
||||
value -= lb;
|
||||
int bin = convert_int_sat_rtn(value / gap);
|
||||
|
||||
if (bin >= hist_bins)
|
||||
lut[x] = OUT_OF_RANGE;
|
||||
else
|
||||
{
|
||||
int hist_index = mad24(hist_step, bin, hist_offset);
|
||||
__global const float * hist = (__global const float *)(histptr + hist_index);
|
||||
|
||||
lut[x] = (int)convert_uchar_sat_rte(hist[0] * scale);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void LUT(__global const uchar * src, int src_step, int src_offset,
|
||||
__constant int * lut,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
int src_index = mad24(y, src_step, src_offset + x * scn);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
int value = lut[src[src_index]];
|
||||
dst[dst_index] = value == OUT_OF_RANGE ? 0 : convert_uchar(value);
|
||||
}
|
||||
}
|
||||
|
||||
#elif histdims == 2
|
||||
|
||||
__kernel void calcLUT(int hist_bins, __global int * lut, int lut_offset,
|
||||
__constant float * ranges, int roffset)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
float value = convert_float(x);
|
||||
|
||||
ranges += roffset;
|
||||
lut += lut_offset;
|
||||
|
||||
if (value > ranges[1] || value < ranges[0])
|
||||
lut[x] = OUT_OF_RANGE;
|
||||
else
|
||||
{
|
||||
float lb = ranges[0], ub = ranges[1], gap = (ub - lb) / hist_bins;
|
||||
value -= lb;
|
||||
int bin = convert_int_sat_rtn(value / gap);
|
||||
|
||||
lut[x] = bin >= hist_bins ? OUT_OF_RANGE : bin;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel void LUT(__global const uchar * src1, int src1_step, int src1_offset,
|
||||
__global const uchar * src2, int src2_step, int src2_offset,
|
||||
__global const uchar * histptr, int hist_step, int hist_offset,
|
||||
__constant int * lut, float scale,
|
||||
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
if (x < dst_cols && y < dst_rows)
|
||||
{
|
||||
int src1_index = mad24(y, src1_step, src1_offset + x * scn1);
|
||||
int src2_index = mad24(y, src2_step, src2_offset + x * scn2);
|
||||
int dst_index = mad24(y, dst_step, dst_offset + x);
|
||||
|
||||
int bin1 = lut[src1[src1_index]];
|
||||
int bin2 = lut[src2[src2_index] + 256];
|
||||
dst[dst_index] = bin1 == OUT_OF_RANGE || bin2 == OUT_OF_RANGE ? 0 :
|
||||
convert_uchar_sat_rte(*(__global const float *)(histptr +
|
||||
mad24(hist_step, bin1, hist_offset + bin2 * (int)sizeof(float))) * scale);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#error "(nimages <= 2) should be true"
|
||||
#endif
|
252
modules/imgproc/src/opencl/clahe.cl
Normal file
252
modules/imgproc/src/opencl/clahe.cl
Normal file
@ -0,0 +1,252 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef WAVE_SIZE
|
||||
#define WAVE_SIZE 1
|
||||
#endif
|
||||
|
||||
inline int calc_lut(__local int* smem, int val, int tid)
|
||||
{
|
||||
smem[tid] = val;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid == 0)
|
||||
for (int i = 1; i < 256; ++i)
|
||||
smem[i] += smem[i - 1];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
return smem[tid];
|
||||
}
|
||||
|
||||
#ifdef CPU
|
||||
inline void reduce(volatile __local int* smem, int val, int tid)
|
||||
{
|
||||
smem[tid] = val;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128)
|
||||
smem[tid] = val += smem[tid + 128];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64)
|
||||
smem[tid] = val += smem[tid + 64];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
smem[tid] += smem[tid + 32];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
smem[tid] += smem[tid + 16];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
smem[tid] += smem[tid + 8];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 4)
|
||||
smem[tid] += smem[tid + 4];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 2)
|
||||
smem[tid] += smem[tid + 2];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 1)
|
||||
smem[256] = smem[tid] + smem[tid + 1];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline void reduce(__local volatile int* smem, int val, int tid)
|
||||
{
|
||||
smem[tid] = val;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 128)
|
||||
smem[tid] = val += smem[tid + 128];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 64)
|
||||
smem[tid] = val += smem[tid + 64];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 32)
|
||||
{
|
||||
smem[tid] += smem[tid + 32];
|
||||
#if WAVE_SIZE < 32
|
||||
} barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
#endif
|
||||
smem[tid] += smem[tid + 16];
|
||||
#if WAVE_SIZE < 16
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
#endif
|
||||
smem[tid] += smem[tid + 8];
|
||||
smem[tid] += smem[tid + 4];
|
||||
smem[tid] += smem[tid + 2];
|
||||
smem[tid] += smem[tid + 1];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
__kernel void calcLut(__global __const uchar * src, const int srcStep,
|
||||
const int src_offset, __global uchar * lut,
|
||||
const int dstStep, const int dst_offset,
|
||||
const int2 tileSize, const int tilesX,
|
||||
const int clipLimit, const float lutScale)
|
||||
{
|
||||
__local int smem[512];
|
||||
|
||||
int tx = get_group_id(0);
|
||||
int ty = get_group_id(1);
|
||||
int tid = get_local_id(1) * get_local_size(0)
|
||||
+ get_local_id(0);
|
||||
smem[tid] = 0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1))
|
||||
{
|
||||
__global const uchar* srcPtr = src + mad24(ty * tileSize.y + i, srcStep, tx * tileSize.x + src_offset);
|
||||
for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0))
|
||||
{
|
||||
const int data = srcPtr[j];
|
||||
atomic_inc(&smem[data]);
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
int tHistVal = smem[tid];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (clipLimit > 0)
|
||||
{
|
||||
// clip histogram bar
|
||||
int clipped = 0;
|
||||
if (tHistVal > clipLimit)
|
||||
{
|
||||
clipped = tHistVal - clipLimit;
|
||||
tHistVal = clipLimit;
|
||||
}
|
||||
|
||||
// find number of overall clipped samples
|
||||
reduce(smem, clipped, tid);
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
#ifdef CPU
|
||||
clipped = smem[256];
|
||||
#else
|
||||
clipped = smem[0];
|
||||
#endif
|
||||
|
||||
// broadcast evaluated value
|
||||
|
||||
__local int totalClipped;
|
||||
|
||||
if (tid == 0)
|
||||
totalClipped = clipped;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// redistribute clipped samples evenly
|
||||
|
||||
int redistBatch = totalClipped / 256;
|
||||
tHistVal += redistBatch;
|
||||
|
||||
int residual = totalClipped - redistBatch * 256;
|
||||
if (tid < residual)
|
||||
++tHistVal;
|
||||
}
|
||||
|
||||
const int lutVal = calc_lut(smem, tHistVal, tid);
|
||||
uint ires = (uint)convert_int_rte(lutScale * lutVal);
|
||||
lut[(ty * tilesX + tx) * dstStep + tid + dst_offset] =
|
||||
convert_uchar(clamp(ires, (uint)0, (uint)255));
|
||||
}
|
||||
|
||||
__kernel void transform(__global __const uchar * src, const int srcStep, const int src_offset,
|
||||
__global uchar * dst, const int dstStep, const int dst_offset,
|
||||
__global uchar * lut, const int lutStep, int lut_offset,
|
||||
const int cols, const int rows,
|
||||
const int2 tileSize,
|
||||
const int tilesX, const int tilesY)
|
||||
{
|
||||
const int x = get_global_id(0);
|
||||
const int y = get_global_id(1);
|
||||
|
||||
if (x >= cols || y >= rows)
|
||||
return;
|
||||
|
||||
const float tyf = (convert_float(y) / tileSize.y) - 0.5f;
|
||||
int ty1 = convert_int_rtn(tyf);
|
||||
int ty2 = ty1 + 1;
|
||||
const float ya = tyf - ty1;
|
||||
ty1 = max(ty1, 0);
|
||||
ty2 = min(ty2, tilesY - 1);
|
||||
|
||||
const float txf = (convert_float(x) / tileSize.x) - 0.5f;
|
||||
int tx1 = convert_int_rtn(txf);
|
||||
int tx2 = tx1 + 1;
|
||||
const float xa = txf - tx1;
|
||||
tx1 = max(tx1, 0);
|
||||
tx2 = min(tx2, tilesX - 1);
|
||||
|
||||
const int srcVal = src[mad24(y, srcStep, x + src_offset)];
|
||||
|
||||
float res = 0;
|
||||
|
||||
res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (1.0f - ya));
|
||||
res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (1.0f - ya));
|
||||
res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal + lut_offset)] * ((1.0f - xa) * (ya));
|
||||
res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal + lut_offset)] * ((xa) * (ya));
|
||||
|
||||
uint ires = (uint)convert_int_rte(res);
|
||||
dst[mad24(y, dstStep, x + dst_offset)] = convert_uchar(clamp(ires, (uint)0, (uint)255));
|
||||
}
|
116
modules/imgproc/src/opencl/filterSepCol.cl
Normal file
116
modules/imgproc/src/opencl/filterSepCol.cl
Normal file
@ -0,0 +1,116 @@
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
|
||||
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
|
||||
#define RADIUS 1
|
||||
#if CN ==1
|
||||
#define ALIGN (((RADIUS)+3)>>2<<2)
|
||||
#elif CN==2
|
||||
#define ALIGN (((RADIUS)+1)>>1<<1)
|
||||
#elif CN==3
|
||||
#define ALIGN (((RADIUS)+3)>>2<<2)
|
||||
#elif CN==4
|
||||
#define ALIGN (RADIUS)
|
||||
#define READ_TIMES_ROW ((2*(RADIUS+LSIZE0)-1)/LSIZE0)
|
||||
#endif
|
||||
|
||||
/**********************************************************************************
|
||||
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
|
||||
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
|
||||
kernel must be in the center. ROI is not supported either.
|
||||
Each kernels read 4 elements(not 4 pixels), save them to LDS and read the data needed
|
||||
from LDS to calculate the result.
|
||||
The length of the convovle kernel supported is only related to the MAX size of LDS,
|
||||
which is HW related.
|
||||
Niko
|
||||
6/29/2011
|
||||
The info above maybe obsolete.
|
||||
***********************************************************************************/
|
||||
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void col_filter
|
||||
(__global const GENTYPE_SRC * restrict src,
|
||||
const int src_step_in_pixel,
|
||||
const int src_whole_cols,
|
||||
const int src_whole_rows,
|
||||
__global GENTYPE_DST * dst,
|
||||
const int dst_offset_in_pixel,
|
||||
const int dst_step_in_pixel,
|
||||
const int dst_cols,
|
||||
const int dst_rows,
|
||||
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSY+1)))))
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
|
||||
int start_addr = mad24(y, src_step_in_pixel, x);
|
||||
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
|
||||
|
||||
int i;
|
||||
GENTYPE_SRC sum, temp[READ_TIMES_COL];
|
||||
__local GENTYPE_SRC LDS_DAT[LSIZE1 * READ_TIMES_COL][LSIZE0 + 1];
|
||||
|
||||
//read pixels from src
|
||||
for(i = 0;i<READ_TIMES_COL;i++)
|
||||
{
|
||||
int current_addr = start_addr+i*LSIZE1*src_step_in_pixel;
|
||||
current_addr = current_addr < end_addr ? current_addr : 0;
|
||||
temp[i] = src[current_addr];
|
||||
}
|
||||
//save pixels to lds
|
||||
for(i = 0;i<READ_TIMES_COL;i++)
|
||||
{
|
||||
LDS_DAT[l_y+i*LSIZE1][l_x] = temp[i];
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
//read pixels from lds and calculate the result
|
||||
sum = LDS_DAT[l_y+RADIUSY][l_x]*mat_kernel[RADIUSY];
|
||||
for(i=1;i<=RADIUSY;i++)
|
||||
{
|
||||
temp[0]=LDS_DAT[l_y+RADIUSY-i][l_x];
|
||||
temp[1]=LDS_DAT[l_y+RADIUSY+i][l_x];
|
||||
sum += temp[0] * mat_kernel[RADIUSY-i]+temp[1] * mat_kernel[RADIUSY+i];
|
||||
}
|
||||
//write the result to dst
|
||||
if((x<dst_cols) & (y<dst_rows))
|
||||
{
|
||||
start_addr = mad24(y, dst_step_in_pixel, x + dst_offset_in_pixel);
|
||||
dst[start_addr] = convert_to_DST(sum);
|
||||
}
|
||||
}
|
570
modules/imgproc/src/opencl/filterSepRow.cl
Normal file
570
modules/imgproc/src/opencl/filterSepRow.cl
Normal file
@ -0,0 +1,570 @@
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
|
||||
#define READ_TIMES_ROW ((2*(RADIUSX+LSIZE0)-1)/LSIZE0) //for c4 only
|
||||
#define READ_TIMES_COL ((2*(RADIUSY+LSIZE1)-1)/LSIZE1)
|
||||
//#pragma OPENCL EXTENSION cl_amd_printf : enable
|
||||
#define RADIUS 1
|
||||
#if CN ==1
|
||||
#define ALIGN (((RADIUS)+3)>>2<<2)
|
||||
#elif CN==2
|
||||
#define ALIGN (((RADIUS)+1)>>1<<1)
|
||||
#elif CN==3
|
||||
#define ALIGN (((RADIUS)+3)>>2<<2)
|
||||
#elif CN==4
|
||||
#define ALIGN (RADIUS)
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_REPLICATE
|
||||
//BORDER_REPLICATE: aaaaaa|abcdefgh|hhhhhhh
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (l_edge) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (r_edge)-1 : (addr))
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_REFLECT
|
||||
//BORDER_REFLECT: fedcba|abcdefgh|hgfedcb
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i)-1 : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-1+((r_edge)<<1) : (addr))
|
||||
#endif
|
||||
|
||||
#ifdef BORDER_REFLECT_101
|
||||
//BORDER_REFLECT_101: gfedcb|abcdefgh|gfedcba
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? -(i) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? -(i)-2+((r_edge)<<1) : (addr))
|
||||
#endif
|
||||
|
||||
//blur function does not support BORDER_WRAP
|
||||
#ifdef BORDER_WRAP
|
||||
//BORDER_WRAP: cdefgh|abcdefgh|abcdefg
|
||||
#define ADDR_L(i, l_edge, r_edge) ((i) < (l_edge) ? (i)+(r_edge) : (i))
|
||||
#define ADDR_R(i, r_edge, addr) ((i) >= (r_edge) ? (i)-(r_edge) : (addr))
|
||||
#endif
|
||||
|
||||
#ifdef EXTRA_EXTRAPOLATION // border > src image size
|
||||
#ifdef BORDER_CONSTANT
|
||||
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
|
||||
#elif defined BORDER_REPLICATE
|
||||
#define EXTRAPOLATE(t, minT, maxT) \
|
||||
{ \
|
||||
t = max(min(t, (maxT) - 1), (minT)); \
|
||||
}
|
||||
#elif defined BORDER_WRAP
|
||||
#define EXTRAPOLATE(x, minT, maxT) \
|
||||
{ \
|
||||
if (t < (minT)) \
|
||||
t -= ((t - (maxT) + 1) / (maxT)) * (maxT); \
|
||||
if (t >= (maxT)) \
|
||||
t %= (maxT); \
|
||||
}
|
||||
#elif defined(BORDER_REFLECT) || defined(BORDER_REFLECT_101)
|
||||
#define EXTRAPOLATE_(t, minT, maxT, delta) \
|
||||
{ \
|
||||
if ((maxT) - (minT) == 1) \
|
||||
t = (minT); \
|
||||
else \
|
||||
do \
|
||||
{ \
|
||||
if (t < (minT)) \
|
||||
t = (minT) - (t - (minT)) - 1 + delta; \
|
||||
else \
|
||||
t = (maxT) - 1 - (t - (maxT)) - delta; \
|
||||
} \
|
||||
while (t >= (maxT) || t < (minT)); \
|
||||
\
|
||||
}
|
||||
#ifdef BORDER_REFLECT
|
||||
#define EXTRAPOLATE(t, minT, maxT) EXTRAPOLATE_(t, minT, maxT, 0)
|
||||
#elif defined(BORDER_REFLECT_101)
|
||||
#define EXTRAPOLATE(t, minT, maxT) EXTRAPOLATE_(t, minT, maxT, 1)
|
||||
#endif
|
||||
#else
|
||||
#error No extrapolation method
|
||||
#endif //BORDER_....
|
||||
#else //EXTRA_EXTRAPOLATION
|
||||
#ifdef BORDER_CONSTANT
|
||||
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
|
||||
#else
|
||||
#define EXTRAPOLATE(t, minT, maxT) \
|
||||
{ \
|
||||
int _delta = t - (minT); \
|
||||
_delta = ADDR_L(_delta, 0, (maxT) - (minT)); \
|
||||
_delta = ADDR_R(_delta, (maxT) - (minT), _delta); \
|
||||
t = _delta + (minT); \
|
||||
}
|
||||
#endif //BORDER_CONSTANT
|
||||
#endif //EXTRA_EXTRAPOLATION
|
||||
|
||||
/**********************************************************************************
|
||||
These kernels are written for separable filters such as Sobel, Scharr, GaussianBlur.
|
||||
Now(6/29/2011) the kernels only support 8U data type and the anchor of the convovle
|
||||
kernel must be in the center. ROI is not supported either.
|
||||
For channels =1,2,4, each kernels read 4 elements(not 4 pixels), and for channels =3,
|
||||
the kernel read 4 pixels, save them to LDS and read the data needed from LDS to
|
||||
calculate the result.
|
||||
The length of the convovle kernel supported is related to the LSIZE0 and the MAX size
|
||||
of LDS, which is HW related.
|
||||
For channels = 1,3 the RADIUS is no more than LSIZE0*2
|
||||
For channels = 2, the RADIUS is no more than LSIZE0
|
||||
For channels = 4, arbitary RADIUS is supported unless the LDS is not enough
|
||||
Niko
|
||||
6/29/2011
|
||||
The info above maybe obsolete.
|
||||
***********************************************************************************/
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D0
|
||||
(__global uchar * restrict src,
|
||||
int src_step_in_pixel,
|
||||
int src_offset_x, int src_offset_y,
|
||||
int src_cols, int src_rows,
|
||||
int src_whole_cols, int src_whole_rows,
|
||||
__global float * dst,
|
||||
int dst_step_in_pixel,
|
||||
int dst_cols, int dst_rows,
|
||||
int radiusy,
|
||||
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
|
||||
{
|
||||
int x = get_global_id(0)<<2;
|
||||
int y = get_global_id(1);
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
|
||||
int start_x = x+src_offset_x - RADIUSX & 0xfffffffc;
|
||||
int offset = src_offset_x - RADIUSX & 3;
|
||||
int start_y = y + src_offset_y - radiusy;
|
||||
int start_addr = mad24(start_y, src_step_in_pixel, start_x);
|
||||
int i;
|
||||
float4 sum;
|
||||
uchar4 temp[READ_TIMES_ROW];
|
||||
|
||||
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
|
||||
#ifdef BORDER_CONSTANT
|
||||
int end_addr = mad24(src_whole_rows - 1, src_step_in_pixel, src_whole_cols);
|
||||
|
||||
// read pixels from src
|
||||
for (i = 0; i < READ_TIMES_ROW; i++)
|
||||
{
|
||||
int current_addr = start_addr+i*LSIZE0*4;
|
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
|
||||
temp[i] = *(__global uchar4*)&src[current_addr];
|
||||
}
|
||||
|
||||
// judge if read out of boundary
|
||||
#ifdef BORDER_ISOLATED
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i].x = ELEM(start_x+i*LSIZE0*4, src_offset_x, src_offset_x + src_cols, 0, temp[i].x);
|
||||
temp[i].y = ELEM(start_x+i*LSIZE0*4+1, src_offset_x, src_offset_x + src_cols, 0, temp[i].y);
|
||||
temp[i].z = ELEM(start_x+i*LSIZE0*4+2, src_offset_x, src_offset_x + src_cols, 0, temp[i].z);
|
||||
temp[i].w = ELEM(start_x+i*LSIZE0*4+3, src_offset_x, src_offset_x + src_cols, 0, temp[i].w);
|
||||
temp[i] = ELEM(start_y, src_offset_y, src_offset_y + src_rows, (uchar4)0, temp[i]);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i].x = ELEM(start_x+i*LSIZE0*4, 0, src_whole_cols, 0, temp[i].x);
|
||||
temp[i].y = ELEM(start_x+i*LSIZE0*4+1, 0, src_whole_cols, 0, temp[i].y);
|
||||
temp[i].z = ELEM(start_x+i*LSIZE0*4+2, 0, src_whole_cols, 0, temp[i].z);
|
||||
temp[i].w = ELEM(start_x+i*LSIZE0*4+3, 0, src_whole_cols, 0, temp[i].w);
|
||||
temp[i] = ELEM(start_y, 0, src_whole_rows, (uchar4)0, temp[i]);
|
||||
}
|
||||
#endif
|
||||
#else // BORDER_CONSTANT
|
||||
#ifdef BORDER_ISOLATED
|
||||
int not_all_in_range = (start_x<src_offset_x) | (start_x + READ_TIMES_ROW*LSIZE0*4+4>src_offset_x + src_cols)| (start_y<src_offset_y) | (start_y >= src_offset_y + src_rows);
|
||||
#else
|
||||
int not_all_in_range = (start_x<0) | (start_x + READ_TIMES_ROW*LSIZE0*4+4>src_whole_cols)| (start_y<0) | (start_y >= src_whole_rows);
|
||||
#endif
|
||||
int4 index[READ_TIMES_ROW];
|
||||
int4 addr;
|
||||
int s_y;
|
||||
|
||||
if (not_all_in_range)
|
||||
{
|
||||
// judge if read out of boundary
|
||||
for (i = 0; i < READ_TIMES_ROW; i++)
|
||||
{
|
||||
index[i] = (int4)(start_x+i*LSIZE0*4) + (int4)(0, 1, 2, 3);
|
||||
#ifdef BORDER_ISOLATED
|
||||
EXTRAPOLATE(index[i].x, src_offset_x, src_offset_x + src_cols);
|
||||
EXTRAPOLATE(index[i].y, src_offset_x, src_offset_x + src_cols);
|
||||
EXTRAPOLATE(index[i].z, src_offset_x, src_offset_x + src_cols);
|
||||
EXTRAPOLATE(index[i].w, src_offset_x, src_offset_x + src_cols);
|
||||
#else
|
||||
EXTRAPOLATE(index[i].x, 0, src_whole_cols);
|
||||
EXTRAPOLATE(index[i].y, 0, src_whole_cols);
|
||||
EXTRAPOLATE(index[i].z, 0, src_whole_cols);
|
||||
EXTRAPOLATE(index[i].w, 0, src_whole_cols);
|
||||
#endif
|
||||
}
|
||||
s_y = start_y;
|
||||
#ifdef BORDER_ISOLATED
|
||||
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
|
||||
#else
|
||||
EXTRAPOLATE(s_y, 0, src_whole_rows);
|
||||
#endif
|
||||
|
||||
// read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
addr = mad24((int4)s_y,(int4)src_step_in_pixel,index[i]);
|
||||
temp[i].x = src[addr.x];
|
||||
temp[i].y = src[addr.y];
|
||||
temp[i].z = src[addr.z];
|
||||
temp[i].w = src[addr.w];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
temp[i] = *(__global uchar4*)&src[start_addr+i*LSIZE0*4];
|
||||
}
|
||||
#endif //BORDER_CONSTANT
|
||||
|
||||
// save pixels to lds
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// read pixels from lds and calculate the result
|
||||
sum =convert_float4(vload4(0,(__local uchar*)&LDS_DAT[l_y][l_x]+RADIUSX+offset))*mat_kernel[RADIUSX];
|
||||
for (i=1; i<=RADIUSX; i++)
|
||||
{
|
||||
temp[0] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset - i);
|
||||
temp[1] = vload4(0, (__local uchar*)&LDS_DAT[l_y][l_x] + RADIUSX + offset + i);
|
||||
sum += convert_float4(temp[0]) * mat_kernel[RADIUSX-i] + convert_float4(temp[1]) * mat_kernel[RADIUSX+i];
|
||||
}
|
||||
|
||||
start_addr = mad24(y,dst_step_in_pixel,x);
|
||||
|
||||
// write the result to dst
|
||||
if ((x+3<dst_cols) & (y<dst_rows))
|
||||
*(__global float4*)&dst[start_addr] = sum;
|
||||
else if ((x+2<dst_cols) && (y<dst_rows))
|
||||
{
|
||||
dst[start_addr] = sum.x;
|
||||
dst[start_addr+1] = sum.y;
|
||||
dst[start_addr+2] = sum.z;
|
||||
}
|
||||
else if ((x+1<dst_cols) && (y<dst_rows))
|
||||
{
|
||||
dst[start_addr] = sum.x;
|
||||
dst[start_addr+1] = sum.y;
|
||||
}
|
||||
else if (x<dst_cols && y<dst_rows)
|
||||
dst[start_addr] = sum.x;
|
||||
}
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D0
|
||||
(__global uchar4 * restrict src,
|
||||
int src_step_in_pixel,
|
||||
int src_offset_x, int src_offset_y,
|
||||
int src_cols, int src_rows,
|
||||
int src_whole_cols, int src_whole_rows,
|
||||
__global float4 * dst,
|
||||
int dst_step_in_pixel,
|
||||
int dst_cols, int dst_rows,
|
||||
int radiusy,
|
||||
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
int start_x = x+src_offset_x-RADIUSX;
|
||||
int start_y = y+src_offset_y-radiusy;
|
||||
int start_addr = mad24(start_y,src_step_in_pixel,start_x);
|
||||
int i;
|
||||
float4 sum;
|
||||
uchar4 temp[READ_TIMES_ROW];
|
||||
|
||||
__local uchar4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
|
||||
#ifdef BORDER_CONSTANT
|
||||
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
|
||||
|
||||
// read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
int current_addr = start_addr+i*LSIZE0;
|
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
|
||||
temp[i] = src[current_addr];
|
||||
}
|
||||
|
||||
//judge if read out of boundary
|
||||
#ifdef BORDER_ISOLATED
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i]= ELEM(start_x+i*LSIZE0, src_offset_x, src_offset_x + src_cols, (uchar4)0, temp[i]);
|
||||
temp[i]= ELEM(start_y, src_offset_y, src_offset_y + src_rows, (uchar4)0, temp[i]);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i]= ELEM(start_x+i*LSIZE0, 0, src_whole_cols, (uchar4)0, temp[i]);
|
||||
temp[i]= ELEM(start_y, 0, src_whole_rows, (uchar4)0, temp[i]);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
int index[READ_TIMES_ROW];
|
||||
int s_x,s_y;
|
||||
|
||||
// judge if read out of boundary
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
s_x = start_x+i*LSIZE0;
|
||||
s_y = start_y;
|
||||
#ifdef BORDER_ISOLATED
|
||||
EXTRAPOLATE(s_x, src_offset_x, src_offset_x + src_cols);
|
||||
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
|
||||
#else
|
||||
EXTRAPOLATE(s_x, 0, src_whole_cols);
|
||||
EXTRAPOLATE(s_y, 0, src_whole_rows);
|
||||
#endif
|
||||
index[i]=mad24(s_y, src_step_in_pixel, s_x);
|
||||
}
|
||||
|
||||
//read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
temp[i] = src[index[i]];
|
||||
#endif //BORDER_CONSTANT
|
||||
|
||||
//save pixels to lds
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//read pixels from lds and calculate the result
|
||||
sum =convert_float4(LDS_DAT[l_y][l_x+RADIUSX])*mat_kernel[RADIUSX];
|
||||
for (i=1; i<=RADIUSX; i++)
|
||||
{
|
||||
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
|
||||
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
|
||||
sum += convert_float4(temp[0])*mat_kernel[RADIUSX-i]+convert_float4(temp[1])*mat_kernel[RADIUSX+i];
|
||||
}
|
||||
//write the result to dst
|
||||
if (x<dst_cols && y<dst_rows)
|
||||
{
|
||||
start_addr = mad24(y,dst_step_in_pixel,x);
|
||||
dst[start_addr] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C1_D5
|
||||
(__global float * restrict src,
|
||||
int src_step_in_pixel,
|
||||
int src_offset_x, int src_offset_y,
|
||||
int src_cols, int src_rows,
|
||||
int src_whole_cols, int src_whole_rows,
|
||||
__global float * dst,
|
||||
int dst_step_in_pixel,
|
||||
int dst_cols, int dst_rows,
|
||||
int radiusy,
|
||||
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
int start_x = x+src_offset_x-RADIUSX;
|
||||
int start_y = y+src_offset_y-radiusy;
|
||||
int start_addr = mad24(start_y,src_step_in_pixel,start_x);
|
||||
int i;
|
||||
float sum;
|
||||
float temp[READ_TIMES_ROW];
|
||||
|
||||
__local float LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
|
||||
#ifdef BORDER_CONSTANT
|
||||
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
|
||||
|
||||
// read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
int current_addr = start_addr+i*LSIZE0;
|
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
|
||||
temp[i] = src[current_addr];
|
||||
}
|
||||
|
||||
// judge if read out of boundary
|
||||
#ifdef BORDER_ISOLATED
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i]= ELEM(start_x+i*LSIZE0, src_offset_x, src_offset_x + src_cols, (float)0,temp[i]);
|
||||
temp[i]= ELEM(start_y, src_offset_y, src_offset_y + src_rows, (float)0,temp[i]);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i]= ELEM(start_x+i*LSIZE0, 0, src_whole_cols, (float)0,temp[i]);
|
||||
temp[i]= ELEM(start_y, 0, src_whole_rows, (float)0,temp[i]);
|
||||
}
|
||||
#endif
|
||||
#else // BORDER_CONSTANT
|
||||
int index[READ_TIMES_ROW];
|
||||
int s_x,s_y;
|
||||
// judge if read out of boundary
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
s_x = start_x + i*LSIZE0, s_y = start_y;
|
||||
#ifdef BORDER_ISOLATED
|
||||
EXTRAPOLATE(s_x, src_offset_x, src_offset_x + src_cols);
|
||||
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
|
||||
#else
|
||||
EXTRAPOLATE(s_x, 0, src_whole_cols);
|
||||
EXTRAPOLATE(s_y, 0, src_whole_rows);
|
||||
#endif
|
||||
|
||||
index[i]=mad24(s_y, src_step_in_pixel, s_x);
|
||||
}
|
||||
// read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
temp[i] = src[index[i]];
|
||||
#endif// BORDER_CONSTANT
|
||||
|
||||
//save pixels to lds
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// read pixels from lds and calculate the result
|
||||
sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
|
||||
for (i=1; i<=RADIUSX; i++)
|
||||
{
|
||||
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
|
||||
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
|
||||
sum += temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i];
|
||||
}
|
||||
|
||||
// write the result to dst
|
||||
if (x<dst_cols && y<dst_rows)
|
||||
{
|
||||
start_addr = mad24(y,dst_step_in_pixel,x);
|
||||
dst[start_addr] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(LSIZE0,LSIZE1,1))) void row_filter_C4_D5
|
||||
(__global float4 * restrict src,
|
||||
int src_step_in_pixel,
|
||||
int src_offset_x, int src_offset_y,
|
||||
int src_cols, int src_rows,
|
||||
int src_whole_cols, int src_whole_rows,
|
||||
__global float4 * dst,
|
||||
int dst_step_in_pixel,
|
||||
int dst_cols, int dst_rows,
|
||||
int radiusy,
|
||||
__constant float * mat_kernel __attribute__((max_constant_size(4*(2*RADIUSX+1)))))
|
||||
{
|
||||
int x = get_global_id(0);
|
||||
int y = get_global_id(1);
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
int start_x = x+src_offset_x-RADIUSX;
|
||||
int start_y = y+src_offset_y-radiusy;
|
||||
int start_addr = mad24(start_y,src_step_in_pixel,start_x);
|
||||
int i;
|
||||
float4 sum;
|
||||
float4 temp[READ_TIMES_ROW];
|
||||
|
||||
__local float4 LDS_DAT[LSIZE1][READ_TIMES_ROW*LSIZE0+1];
|
||||
#ifdef BORDER_CONSTANT
|
||||
int end_addr = mad24(src_whole_rows - 1,src_step_in_pixel,src_whole_cols);
|
||||
|
||||
// read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
int current_addr = start_addr+i*LSIZE0;
|
||||
current_addr = ((current_addr < end_addr) && (current_addr > 0)) ? current_addr : 0;
|
||||
temp[i] = src[current_addr];
|
||||
}
|
||||
|
||||
// judge if read out of boundary
|
||||
#ifdef BORDER_ISOLATED
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i]= ELEM(start_x+i*LSIZE0, src_offset_x, src_offset_x + src_cols, (float4)0,temp[i]);
|
||||
temp[i]= ELEM(start_y, src_offset_y, src_offset_y + src_rows, (float4)0,temp[i]);
|
||||
}
|
||||
#else
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
temp[i]= ELEM(start_x+i*LSIZE0, 0, src_whole_cols, (float4)0,temp[i]);
|
||||
temp[i]= ELEM(start_y, 0, src_whole_rows, (float4)0,temp[i]);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
int index[READ_TIMES_ROW];
|
||||
int s_x,s_y;
|
||||
|
||||
// judge if read out of boundary
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
{
|
||||
s_x = start_x + i*LSIZE0, s_y = start_y;
|
||||
#ifdef BORDER_ISOLATED
|
||||
EXTRAPOLATE(s_x, src_offset_x, src_offset_x + src_cols);
|
||||
EXTRAPOLATE(s_y, src_offset_y, src_offset_y + src_rows);
|
||||
#else
|
||||
EXTRAPOLATE(s_x, 0, src_whole_cols);
|
||||
EXTRAPOLATE(s_y, 0, src_whole_rows);
|
||||
#endif
|
||||
|
||||
index[i]=mad24(s_y,src_step_in_pixel,s_x);
|
||||
}
|
||||
// read pixels from src
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
temp[i] = src[index[i]];
|
||||
#endif
|
||||
|
||||
// save pixels to lds
|
||||
for (i = 0; i<READ_TIMES_ROW; i++)
|
||||
LDS_DAT[l_y][l_x+i*LSIZE0]=temp[i];
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
// read pixels from lds and calculate the result
|
||||
sum =LDS_DAT[l_y][l_x+RADIUSX]*mat_kernel[RADIUSX];
|
||||
for (i=1; i<=RADIUSX; i++)
|
||||
{
|
||||
temp[0]=LDS_DAT[l_y][l_x+RADIUSX-i];
|
||||
temp[1]=LDS_DAT[l_y][l_x+RADIUSX+i];
|
||||
sum += temp[0]*mat_kernel[RADIUSX-i]+temp[1]*mat_kernel[RADIUSX+i];
|
||||
}
|
||||
|
||||
// write the result to dst
|
||||
if (x<dst_cols && y<dst_rows)
|
||||
{
|
||||
start_addr = mad24(y,dst_step_in_pixel,x);
|
||||
dst[start_addr] = sum;
|
||||
}
|
||||
}
|
147
modules/imgproc/src/opencl/moments.cl
Normal file
147
modules/imgproc/src/opencl/moments.cl
Normal file
@ -0,0 +1,147 @@
|
||||
/* See LICENSE file in the root OpenCV directory */
|
||||
|
||||
#if TILE_SIZE != 32
|
||||
#error "TILE SIZE should be 32"
|
||||
#endif
|
||||
|
||||
__kernel void moments(__global const uchar* src, int src_step, int src_offset,
|
||||
int src_rows, int src_cols, __global int* mom0, int xtiles)
|
||||
{
|
||||
int x0 = get_global_id(0);
|
||||
int y0 = get_group_id(1);
|
||||
int x, y = get_local_id(1);
|
||||
int x_min = x0*TILE_SIZE;
|
||||
int ypix = y0*TILE_SIZE + y;
|
||||
__local int mom[TILE_SIZE][10];
|
||||
|
||||
if( x_min < src_cols && y0*TILE_SIZE < src_rows )
|
||||
{
|
||||
if( ypix < src_rows )
|
||||
{
|
||||
int x_max = min(src_cols - x_min, TILE_SIZE);
|
||||
__global const uchar* ptr = src + src_offset + ypix*src_step + x_min;
|
||||
int4 S = (int4)(0,0,0,0), p;
|
||||
|
||||
#define SUM_ELEM(elem, ofs) \
|
||||
(int4)(1, (ofs), (ofs)*(ofs), (ofs)*(ofs)*(ofs))*elem
|
||||
|
||||
x = x_max & -4;
|
||||
if( x_max >= 4 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr));
|
||||
S += SUM_ELEM(p.s0, 0) + SUM_ELEM(p.s1, 1) + SUM_ELEM(p.s2, 2) + SUM_ELEM(p.s3, 3);
|
||||
|
||||
if( x_max >= 8 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr+4));
|
||||
S += SUM_ELEM(p.s0, 4) + SUM_ELEM(p.s1, 5) + SUM_ELEM(p.s2, 6) + SUM_ELEM(p.s3, 7);
|
||||
|
||||
if( x_max >= 12 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr+8));
|
||||
S += SUM_ELEM(p.s0, 8) + SUM_ELEM(p.s1, 9) + SUM_ELEM(p.s2, 10) + SUM_ELEM(p.s3, 11);
|
||||
|
||||
if( x_max >= 16 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr+12));
|
||||
S += SUM_ELEM(p.s0, 12) + SUM_ELEM(p.s1, 13) + SUM_ELEM(p.s2, 14) + SUM_ELEM(p.s3, 15);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( x_max >= 20 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr+16));
|
||||
S += SUM_ELEM(p.s0, 16) + SUM_ELEM(p.s1, 17) + SUM_ELEM(p.s2, 18) + SUM_ELEM(p.s3, 19);
|
||||
|
||||
if( x_max >= 24 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr+20));
|
||||
S += SUM_ELEM(p.s0, 20) + SUM_ELEM(p.s1, 21) + SUM_ELEM(p.s2, 22) + SUM_ELEM(p.s3, 23);
|
||||
|
||||
if( x_max >= 28 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr+24));
|
||||
S += SUM_ELEM(p.s0, 24) + SUM_ELEM(p.s1, 25) + SUM_ELEM(p.s2, 26) + SUM_ELEM(p.s3, 27);
|
||||
|
||||
if( x_max >= 32 )
|
||||
{
|
||||
p = convert_int4(vload4(0, ptr+28));
|
||||
S += SUM_ELEM(p.s0, 28) + SUM_ELEM(p.s1, 29) + SUM_ELEM(p.s2, 30) + SUM_ELEM(p.s3, 31);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( x < x_max )
|
||||
{
|
||||
int ps = ptr[x];
|
||||
S += SUM_ELEM(ps, x);
|
||||
if( x+1 < x_max )
|
||||
{
|
||||
ps = ptr[x+1];
|
||||
S += SUM_ELEM(ps, x+1);
|
||||
if( x+2 < x_max )
|
||||
{
|
||||
ps = ptr[x+2];
|
||||
S += SUM_ELEM(ps, x+2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int sy = y*y;
|
||||
|
||||
mom[y][0] = S.s0;
|
||||
mom[y][1] = S.s1;
|
||||
mom[y][2] = y*S.s0;
|
||||
mom[y][3] = S.s2;
|
||||
mom[y][4] = y*S.s1;
|
||||
mom[y][5] = sy*S.s0;
|
||||
mom[y][6] = S.s3;
|
||||
mom[y][7] = y*S.s2;
|
||||
mom[y][8] = sy*S.s1;
|
||||
mom[y][9] = y*sy*S.s0;
|
||||
}
|
||||
else
|
||||
mom[y][0] = mom[y][1] = mom[y][2] = mom[y][3] = mom[y][4] =
|
||||
mom[y][5] = mom[y][6] = mom[y][7] = mom[y][8] = mom[y][9] = 0;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
#define REDUCE(d) \
|
||||
if( y < d ) \
|
||||
{ \
|
||||
mom[y][0] += mom[y+d][0]; \
|
||||
mom[y][1] += mom[y+d][1]; \
|
||||
mom[y][2] += mom[y+d][2]; \
|
||||
mom[y][3] += mom[y+d][3]; \
|
||||
mom[y][4] += mom[y+d][4]; \
|
||||
mom[y][5] += mom[y+d][5]; \
|
||||
mom[y][6] += mom[y+d][6]; \
|
||||
mom[y][7] += mom[y+d][7]; \
|
||||
mom[y][8] += mom[y+d][8]; \
|
||||
mom[y][9] += mom[y+d][9]; \
|
||||
} \
|
||||
barrier(CLK_LOCAL_MEM_FENCE)
|
||||
|
||||
REDUCE(16);
|
||||
REDUCE(8);
|
||||
REDUCE(4);
|
||||
REDUCE(2);
|
||||
|
||||
if( y == 0 )
|
||||
{
|
||||
__global int* momout = mom0 + (y0*xtiles + x0)*10;
|
||||
momout[0] = mom[0][0] + mom[1][0];
|
||||
momout[1] = mom[0][1] + mom[1][1];
|
||||
momout[2] = mom[0][2] + mom[1][2];
|
||||
momout[3] = mom[0][3] + mom[1][3];
|
||||
momout[4] = mom[0][4] + mom[1][4];
|
||||
momout[5] = mom[0][5] + mom[1][5];
|
||||
momout[6] = mom[0][6] + mom[1][6];
|
||||
momout[7] = mom[0][7] + mom[1][7];
|
||||
momout[8] = mom[0][8] + mom[1][8];
|
||||
momout[9] = mom[0][9] + mom[1][9];
|
||||
}
|
||||
}
|
||||
}
|
152
modules/imgproc/src/opencl/morph.cl
Normal file
152
modules/imgproc/src/opencl/morph.cl
Normal file
@ -0,0 +1,152 @@
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Zero Lin, zero.lin@amd.com
|
||||
// Yao Wang, bitwangyaoyao@gmail.com
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors as is and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//
|
||||
|
||||
#ifdef DOUBLE_SUPPORT
|
||||
#ifdef cl_amd_fp64
|
||||
#pragma OPENCL EXTENSION cl_amd_fp64:enable
|
||||
#elif defined (cl_khr_fp64)
|
||||
#pragma OPENCL EXTENSION cl_khr_fp64:enable
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEPTH_0
|
||||
#ifdef ERODE
|
||||
#define VAL 255
|
||||
#endif
|
||||
#ifdef DILATE
|
||||
#define VAL 0
|
||||
#endif
|
||||
#endif
|
||||
#ifdef DEPTH_5
|
||||
#ifdef ERODE
|
||||
#define VAL FLT_MAX
|
||||
#endif
|
||||
#ifdef DILATE
|
||||
#define VAL -FLT_MAX
|
||||
#endif
|
||||
#endif
|
||||
#ifdef DEPTH_6
|
||||
#ifdef ERODE
|
||||
#define VAL DBL_MAX
|
||||
#endif
|
||||
#ifdef DILATE
|
||||
#define VAL -DBL_MAX
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef ERODE
|
||||
#define MORPH_OP(A,B) min((A),(B))
|
||||
#endif
|
||||
#ifdef DILATE
|
||||
#define MORPH_OP(A,B) max((A),(B))
|
||||
#endif
|
||||
//BORDER_CONSTANT: iiiiii|abcdefgh|iiiiiii
|
||||
#define ELEM(i,l_edge,r_edge,elem1,elem2) (i)<(l_edge) | (i) >= (r_edge) ? (elem1) : (elem2)
|
||||
|
||||
__kernel void morph(__global const uchar * restrict srcptr, int src_step, int src_offset,
|
||||
__global uchar * dstptr, int dst_step, int dst_offset,
|
||||
int src_offset_x, int src_offset_y,
|
||||
int cols, int rows,
|
||||
__constant uchar * mat_kernel,
|
||||
int src_whole_cols, int src_whole_rows)
|
||||
{
|
||||
int l_x = get_local_id(0);
|
||||
int l_y = get_local_id(1);
|
||||
int x = get_group_id(0)*LSIZE0;
|
||||
int y = get_group_id(1)*LSIZE1;
|
||||
int start_x = x+src_offset_x-RADIUSX;
|
||||
int end_x = x + src_offset_x+LSIZE0+RADIUSX;
|
||||
int width = end_x -(x+src_offset_x-RADIUSX)+1;
|
||||
int start_y = y+src_offset_y-RADIUSY;
|
||||
int point1 = mad24(l_y,LSIZE0,l_x);
|
||||
int point2 = point1 + LSIZE0*LSIZE1;
|
||||
int tl_x = point1 % width;
|
||||
int tl_y = point1 / width;
|
||||
int tl_x2 = point2 % width;
|
||||
int tl_y2 = point2 / width;
|
||||
int cur_x = start_x + tl_x;
|
||||
int cur_y = start_y + tl_y;
|
||||
int cur_x2 = start_x + tl_x2;
|
||||
int cur_y2 = start_y + tl_y2;
|
||||
int start_addr = mad24(cur_y,src_step, cur_x*(int)sizeof(GENTYPE));
|
||||
int start_addr2 = mad24(cur_y2,src_step, cur_x2*(int)sizeof(GENTYPE));
|
||||
GENTYPE temp0,temp1;
|
||||
__local GENTYPE LDS_DAT[2*LSIZE1*LSIZE0];
|
||||
|
||||
int end_addr = mad24(src_whole_rows - 1,src_step,src_whole_cols*(int)sizeof(GENTYPE));
|
||||
//read pixels from src
|
||||
start_addr = ((start_addr < end_addr) && (start_addr > 0)) ? start_addr : 0;
|
||||
start_addr2 = ((start_addr2 < end_addr) && (start_addr2 > 0)) ? start_addr2 : 0;
|
||||
__global const GENTYPE * src;
|
||||
src = (__global const GENTYPE *)(srcptr+start_addr);
|
||||
temp0 = src[0];
|
||||
src = (__global const GENTYPE *)(srcptr+start_addr2);
|
||||
temp1 = src[0];
|
||||
//judge if read out of boundary
|
||||
temp0= ELEM(cur_x,0,src_whole_cols,(GENTYPE)VAL,temp0);
|
||||
temp0= ELEM(cur_y,0,src_whole_rows,(GENTYPE)VAL,temp0);
|
||||
|
||||
temp1= ELEM(cur_x2,0,src_whole_cols,(GENTYPE)VAL,temp1);
|
||||
temp1= ELEM(cur_y2,0,src_whole_rows,(GENTYPE)VAL,temp1);
|
||||
|
||||
LDS_DAT[point1] = temp0;
|
||||
LDS_DAT[point2] = temp1;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
GENTYPE res = (GENTYPE)VAL;
|
||||
for(int i=0; i<2*RADIUSY+1; i++)
|
||||
for(int j=0; j<2*RADIUSX+1; j++)
|
||||
{
|
||||
res =
|
||||
#ifndef RECTKERNEL
|
||||
mat_kernel[i*(2*RADIUSX+1)+j] ?
|
||||
#endif
|
||||
MORPH_OP(res,LDS_DAT[mad24(l_y+i,width,l_x+j)])
|
||||
#ifndef RECTKERNEL
|
||||
:res
|
||||
#endif
|
||||
;
|
||||
}
|
||||
int gidx = get_global_id(0);
|
||||
int gidy = get_global_id(1);
|
||||
if(gidx<cols && gidy<rows)
|
||||
{
|
||||
int dst_index = mad24(gidy, dst_step, dst_offset + gidx * (int)sizeof(GENTYPE));
|
||||
__global GENTYPE * dst = (__global GENTYPE *)(dstptr + dst_index);
|
||||
dst[0] = res;
|
||||
}
|
||||
|
||||
}
|
@ -229,6 +229,75 @@ OCL_TEST_P(GaussianBlurTest, Mat)
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Erode
|
||||
|
||||
typedef FilterTestBase Erode;
|
||||
|
||||
OCL_TEST_P(Erode, Mat)
|
||||
{
|
||||
Size kernelSize(ksize, ksize);
|
||||
int iterations = (int)param;
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
|
||||
|
||||
OCL_OFF(cv::erode(src_roi, dst_roi, kernel, Point(-1,-1), iterations) );
|
||||
OCL_ON(cv::erode(usrc_roi, udst_roi, kernel, Point(-1,-1), iterations) );
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Dilate
|
||||
|
||||
typedef FilterTestBase Dilate;
|
||||
|
||||
OCL_TEST_P(Dilate, Mat)
|
||||
{
|
||||
Size kernelSize(ksize, ksize);
|
||||
int iterations = (int)param;
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
|
||||
|
||||
OCL_OFF(cv::dilate(src_roi, dst_roi, kernel, Point(-1,-1), iterations) );
|
||||
OCL_ON(cv::dilate(usrc_roi, udst_roi, kernel, Point(-1,-1), iterations) );
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// MorphologyEx
|
||||
|
||||
typedef FilterTestBase MorphologyEx;
|
||||
|
||||
OCL_TEST_P(MorphologyEx, Mat)
|
||||
{
|
||||
Size kernelSize(ksize, ksize);
|
||||
int iterations = (int)param;
|
||||
int op = size.height;
|
||||
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
Mat kernel = randomMat(kernelSize, CV_8UC1, 0, 3);
|
||||
|
||||
OCL_OFF(cv::morphologyEx(src_roi, dst_roi, op, kernel, Point(-1,-1), iterations) );
|
||||
OCL_ON(cv::morphologyEx(usrc_roi, udst_roi, op, kernel, Point(-1,-1), iterations) );
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#define FILTER_BORDER_SET_NO_ISOLATED \
|
||||
@ -285,6 +354,31 @@ OCL_INSTANTIATE_TEST_CASE_P(Filter, GaussianBlurTest, Combine(
|
||||
Values(0.0), // not used
|
||||
Bool()));
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||
Values(3, 5, 7),
|
||||
Values(Size(0,0)),//not used
|
||||
Values((BorderType)BORDER_CONSTANT),//not used
|
||||
Values(1.0, 2.0, 3.0),
|
||||
Bool() ) );
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||
Values(3, 5, 7),
|
||||
Values(Size(0,0)),//not used
|
||||
Values((BorderType)BORDER_CONSTANT),//not used
|
||||
Values(1.0, 2.0, 3.0),
|
||||
Bool() ) );
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Filter, MorphologyEx, Combine(
|
||||
Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4, CV_64FC1, CV_64FC4),
|
||||
Values(3, 5, 7),
|
||||
Values(Size(0,0), Size(0,1), Size(0,2), Size(0,3), Size(0,4), Size(0,5),Size(0,6)),//uses as generator of operations
|
||||
Values((BorderType)BORDER_CONSTANT),//not used
|
||||
Values(1.0, 2.0, 3.0),
|
||||
Bool() ) );
|
||||
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
||||
|
175
modules/imgproc/test/ocl/test_histogram.cpp
Normal file
175
modules/imgproc/test/ocl/test_histogram.cpp
Normal file
@ -0,0 +1,175 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
// Niko Li, newlife20080214@gmail.com
|
||||
// Jia Haipeng, jiahaipeng95@gmail.com
|
||||
// Shengen Yan, yanshengen@gmail.com
|
||||
// Jiang Liyuan, lyuan001.good@163.com
|
||||
// Rock Li, Rock.Li@amd.com
|
||||
// Wu Zailong, bullet@yeah.net
|
||||
// Xu Pang, pangxu010@163.com
|
||||
// Sen Liu, swjtuls1987@126.com
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "cvconfig.h"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
PARAM_TEST_CASE(CalcBackProject, MatDepth, int, bool)
|
||||
{
|
||||
int depth, N;
|
||||
bool useRoi;
|
||||
|
||||
std::vector<float> ranges;
|
||||
std::vector<int> channels;
|
||||
double scale;
|
||||
|
||||
std::vector<Mat> images;
|
||||
std::vector<Mat> images_roi;
|
||||
std::vector<UMat> uimages;
|
||||
std::vector<UMat> uimages_roi;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(hist)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst)
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
depth = GET_PARAM(0);
|
||||
N = GET_PARAM(1);
|
||||
useRoi = GET_PARAM(2);
|
||||
|
||||
ASSERT_GE(2, N);
|
||||
|
||||
images.resize(N);
|
||||
images_roi.resize(N);
|
||||
uimages.resize(N);
|
||||
uimages_roi.resize(N);
|
||||
}
|
||||
|
||||
virtual void random_roi()
|
||||
{
|
||||
Size roiSize = randomSize(1, MAX_VALUE);
|
||||
|
||||
int totalChannels = 0;
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
int cn = randomInt(1, 5);
|
||||
randomSubMat(images[i], images_roi[i], roiSize, srcBorder, CV_MAKE_TYPE(depth, cn), 0, 125);
|
||||
|
||||
ranges.push_back(10);
|
||||
ranges.push_back(100);
|
||||
|
||||
channels.push_back(randomInt(0, cn) + totalChannels);
|
||||
totalChannels += cn;
|
||||
}
|
||||
|
||||
Mat tmpHist;
|
||||
{
|
||||
std::vector<int> hist_size(N);
|
||||
for (int i = 0 ; i < N; ++i)
|
||||
hist_size[i] = randomInt(10, 50);
|
||||
|
||||
cv::calcHist(images_roi, channels, noArray(), tmpHist, hist_size, ranges);
|
||||
ASSERT_EQ(CV_32FC1, tmpHist.type());
|
||||
}
|
||||
|
||||
Border histBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(hist, hist_roi, tmpHist.size(), histBorder, tmpHist.type(), 0, MAX_VALUE);
|
||||
tmpHist.copyTo(hist_roi);
|
||||
|
||||
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, CV_MAKE_TYPE(depth, 1), 5, 16);
|
||||
|
||||
for (int i = 0; i < N; ++i)
|
||||
{
|
||||
images[i].copyTo(uimages[i]);
|
||||
|
||||
Size _wholeSize;
|
||||
Point ofs;
|
||||
images_roi[i].locateROI(_wholeSize, ofs);
|
||||
|
||||
uimages_roi[i] = uimages[i](Rect(ofs.x, ofs.y, images_roi[i].cols, images_roi[i].rows));
|
||||
}
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(hist)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
|
||||
|
||||
scale = randomDouble(0.1, 1);
|
||||
}
|
||||
|
||||
void Near()
|
||||
{
|
||||
OCL_EXPECT_MATS_NEAR(dst, 0.0)
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////// CalcBackProject //////////////////////////////////////////////
|
||||
|
||||
OCL_TEST_P(CalcBackProject, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
OCL_OFF(cv::calcBackProject(images_roi, channels, hist_roi, dst_roi, ranges, scale));
|
||||
OCL_ON(cv::calcBackProject(uimages_roi, channels, uhist_roi, udst_roi, ranges, scale));
|
||||
|
||||
Near();
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(Imgproc, CalcBackProject, Combine(Values((MatDepth)CV_8U), Values(1, 2), Bool()));
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
147
modules/imgproc/test/ocl/test_sepfilter2D.cpp
Normal file
147
modules/imgproc/test/ocl/test_sepfilter2D.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/ts/ocl_test.hpp"
|
||||
|
||||
#ifdef HAVE_OPENCL
|
||||
|
||||
namespace cvtest {
|
||||
namespace ocl {
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// sepFilter2D
|
||||
PARAM_TEST_CASE(SepFilter2D, MatDepth, Channels, BorderType, bool, bool)
|
||||
{
|
||||
static const int kernelMinSize = 2;
|
||||
static const int kernelMaxSize = 10;
|
||||
|
||||
int type;
|
||||
Point anchor;
|
||||
int borderType;
|
||||
bool useRoi;
|
||||
Mat kernelX, kernelY;
|
||||
|
||||
TEST_DECLARE_INPUT_PARAMETER(src)
|
||||
TEST_DECLARE_OUTPUT_PARAMETER(dst)
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
type = CV_MAKE_TYPE(GET_PARAM(0), GET_PARAM(1));
|
||||
borderType = GET_PARAM(2) | (GET_PARAM(3) ? BORDER_ISOLATED : 0);
|
||||
useRoi = GET_PARAM(4);
|
||||
}
|
||||
|
||||
void random_roi()
|
||||
{
|
||||
Size ksize = randomSize(kernelMinSize, kernelMaxSize);
|
||||
if (1 != (ksize.width % 2))
|
||||
ksize.width++;
|
||||
if (1 != (ksize.height % 2))
|
||||
ksize.height++;
|
||||
Mat temp = randomMat(Size(ksize.width, 1), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
|
||||
cv::normalize(temp, kernelX, 1.0, 0.0, NORM_L1);
|
||||
temp = randomMat(Size(1, ksize.height), CV_MAKE_TYPE(CV_32F, 1), -MAX_VALUE, MAX_VALUE);
|
||||
cv::normalize(temp, kernelY, 1.0, 0.0, NORM_L1);
|
||||
|
||||
Size roiSize = randomSize(ksize.width, MAX_VALUE, ksize.height, MAX_VALUE);
|
||||
int rest = roiSize.width % 4;
|
||||
if (0 != rest)
|
||||
roiSize.width += (4 - rest);
|
||||
Border srcBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
rest = srcBorder.lef % 4;
|
||||
if (0 != rest)
|
||||
srcBorder.lef += (4 - rest);
|
||||
rest = srcBorder.rig % 4;
|
||||
if (0 != rest)
|
||||
srcBorder.rig += (4 - rest);
|
||||
randomSubMat(src, src_roi, roiSize, srcBorder, type, -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
Border dstBorder = randomBorder(0, useRoi ? MAX_VALUE : 0);
|
||||
randomSubMat(dst, dst_roi, roiSize, dstBorder, type, -MAX_VALUE, MAX_VALUE);
|
||||
|
||||
anchor.x = -1;
|
||||
anchor.y = -1;
|
||||
|
||||
UMAT_UPLOAD_INPUT_PARAMETER(src)
|
||||
UMAT_UPLOAD_OUTPUT_PARAMETER(dst)
|
||||
}
|
||||
|
||||
void Near(double threshold = 0.0)
|
||||
{
|
||||
OCL_EXPECT_MATS_NEAR(dst, threshold);
|
||||
}
|
||||
};
|
||||
|
||||
OCL_TEST_P(SepFilter2D, Mat)
|
||||
{
|
||||
for (int j = 0; j < test_loop_times; j++)
|
||||
{
|
||||
random_roi();
|
||||
|
||||
OCL_OFF(cv::sepFilter2D(src_roi, dst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
|
||||
OCL_ON(cv::sepFilter2D(usrc_roi, udst_roi, -1, kernelX, kernelY, anchor, 0.0, borderType));
|
||||
|
||||
Near(1.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
OCL_INSTANTIATE_TEST_CASE_P(ImageProc, SepFilter2D,
|
||||
Combine(
|
||||
Values(CV_8U, CV_32F),
|
||||
Values(1, 4),
|
||||
Values(
|
||||
(BorderType)BORDER_CONSTANT,
|
||||
(BorderType)BORDER_REPLICATE,
|
||||
(BorderType)BORDER_REFLECT,
|
||||
(BorderType)BORDER_REFLECT_101),
|
||||
Bool(), // BORDER_ISOLATED
|
||||
Bool() // ROI
|
||||
)
|
||||
);
|
||||
|
||||
|
||||
} } // namespace cvtest::ocl
|
||||
|
||||
#endif // HAVE_OPENCL
|
@ -43,6 +43,13 @@
|
||||
using namespace cv;
|
||||
using namespace std;
|
||||
|
||||
#define OCL_TUNING_MODE 0
|
||||
#if OCL_TUNING_MODE
|
||||
#define OCL_TUNING_MODE_ONLY(code) code
|
||||
#else
|
||||
#define OCL_TUNING_MODE_ONLY(code)
|
||||
#endif
|
||||
|
||||
// image moments
|
||||
class CV_MomentsTest : public cvtest::ArrayTest
|
||||
{
|
||||
@ -60,6 +67,7 @@ protected:
|
||||
void run_func();
|
||||
int coi;
|
||||
bool is_binary;
|
||||
bool try_umat;
|
||||
};
|
||||
|
||||
|
||||
@ -70,6 +78,7 @@ CV_MomentsTest::CV_MomentsTest()
|
||||
test_array[REF_OUTPUT].push_back(NULL);
|
||||
coi = -1;
|
||||
is_binary = false;
|
||||
OCL_TUNING_MODE_ONLY(test_case_count = 10);
|
||||
//element_wise_relative_error = false;
|
||||
}
|
||||
|
||||
@ -96,25 +105,38 @@ void CV_MomentsTest::get_minmax_bounds( int i, int j, int type, Scalar& low, Sca
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CV_MomentsTest::get_test_array_types_and_sizes( int test_case_idx,
|
||||
vector<vector<Size> >& sizes, vector<vector<int> >& types )
|
||||
{
|
||||
RNG& rng = ts->get_rng();
|
||||
cvtest::ArrayTest::get_test_array_types_and_sizes( test_case_idx, sizes, types );
|
||||
int cn = cvtest::randInt(rng) % 4 + 1;
|
||||
int cn = (cvtest::randInt(rng) % 4) + 1;
|
||||
int depth = cvtest::randInt(rng) % 4;
|
||||
depth = depth == 0 ? CV_8U : depth == 1 ? CV_16U : depth == 2 ? CV_16S : CV_32F;
|
||||
if( cn == 2 )
|
||||
|
||||
is_binary = cvtest::randInt(rng) % 2 != 0;
|
||||
if( depth == 0 && !is_binary )
|
||||
try_umat = cvtest::randInt(rng) % 5 != 0;
|
||||
else
|
||||
try_umat = cvtest::randInt(rng) % 2 != 0;
|
||||
|
||||
if( cn == 2 || try_umat )
|
||||
cn = 1;
|
||||
|
||||
OCL_TUNING_MODE_ONLY(
|
||||
cn = 1;
|
||||
depth = CV_8U;
|
||||
try_umat = true;
|
||||
is_binary = false;
|
||||
sizes[INPUT][0] = Size(1024,768)
|
||||
);
|
||||
|
||||
types[INPUT][0] = CV_MAKETYPE(depth, cn);
|
||||
types[OUTPUT][0] = types[REF_OUTPUT][0] = CV_64FC1;
|
||||
sizes[OUTPUT][0] = sizes[REF_OUTPUT][0] = cvSize(MOMENT_COUNT,1);
|
||||
if(CV_MAT_DEPTH(types[INPUT][0])>=CV_32S)
|
||||
sizes[INPUT][0].width = MAX(sizes[INPUT][0].width, 3);
|
||||
|
||||
is_binary = cvtest::randInt(rng) % 2 != 0;
|
||||
coi = 0;
|
||||
cvmat_allowed = true;
|
||||
if( cn > 1 )
|
||||
@ -149,7 +171,25 @@ void CV_MomentsTest::run_func()
|
||||
{
|
||||
CvMoments* m = (CvMoments*)test_mat[OUTPUT][0].ptr<double>();
|
||||
double* others = (double*)(m + 1);
|
||||
cvMoments( test_array[INPUT][0], m, is_binary );
|
||||
if( try_umat )
|
||||
{
|
||||
UMat u;
|
||||
test_mat[INPUT][0].clone().copyTo(u);
|
||||
OCL_TUNING_MODE_ONLY(
|
||||
static double ttime = 0;
|
||||
static int ncalls = 0;
|
||||
moments(u, is_binary != 0);
|
||||
double t = (double)getTickCount());
|
||||
Moments new_m = moments(u, is_binary != 0);
|
||||
OCL_TUNING_MODE_ONLY(
|
||||
ttime += (double)getTickCount() - t;
|
||||
ncalls++;
|
||||
printf("%g\n", ttime/ncalls/u.total()));
|
||||
*m = new_m;
|
||||
}
|
||||
else
|
||||
cvMoments( test_array[INPUT][0], m, is_binary );
|
||||
|
||||
others[0] = cvGetNormalizedCentralMoment( m, 2, 0 );
|
||||
others[1] = cvGetNormalizedCentralMoment( m, 1, 1 );
|
||||
others[2] = cvGetNormalizedCentralMoment( m, 0, 2 );
|
||||
|
@ -18,6 +18,8 @@ class_ignore_list = (
|
||||
const_ignore_list = (
|
||||
"CV_CAP_OPENNI",
|
||||
"CV_CAP_PROP_OPENNI_",
|
||||
"CV_CAP_INTELPERC",
|
||||
"CV_CAP_PROP_INTELPERC_"
|
||||
"WINDOW_AUTOSIZE",
|
||||
"CV_WND_PROP_",
|
||||
"CV_WINDOW_",
|
||||
|
@ -37,6 +37,10 @@ public class OpenCVLoader
|
||||
*/
|
||||
public static final String OPENCV_VERSION_2_4_7 = "2.4.7";
|
||||
|
||||
/**
|
||||
* OpenCV Library version 2.4.8.
|
||||
*/
|
||||
public static final String OPENCV_VERSION_2_4_8 = "2.4.8";
|
||||
|
||||
/**
|
||||
* Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java").
|
||||
|
@ -12,6 +12,7 @@
|
||||
//
|
||||
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
|
||||
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
|
||||
// Copyright (C) 2013, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// @Authors
|
||||
@ -66,8 +67,8 @@ uint read_sumTex(IMAGE_INT32 img, sampler_t sam, int2 coord, int rows, int cols,
|
||||
uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int cols, int elemPerRow)
|
||||
{
|
||||
#ifdef DISABLE_IMAGE2D
|
||||
int x = clamp(convert_int_rte(coord.x), 0, cols - 1);
|
||||
int y = clamp(convert_int_rte(coord.y), 0, rows - 1);
|
||||
int x = clamp(round(coord.x), 0, cols - 1);
|
||||
int y = clamp(round(coord.y), 0, rows - 1);
|
||||
return img[elemPerRow * y + x];
|
||||
#else
|
||||
return (uchar)read_imageui(img, sam, coord).x;
|
||||
@ -98,6 +99,7 @@ __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAM
|
||||
#define CV_PI_F 3.14159265f
|
||||
#endif
|
||||
|
||||
|
||||
// Use integral image to calculate haar wavelets.
|
||||
// N = 2
|
||||
// for simple haar paatern
|
||||
@ -114,10 +116,10 @@ float icvCalcHaarPatternSum_2(
|
||||
|
||||
F d = 0;
|
||||
|
||||
int2 dx1 = convert_int2_rte(ratio * src[0]);
|
||||
int2 dy1 = convert_int2_rte(ratio * src[1]);
|
||||
int2 dx2 = convert_int2_rte(ratio * src[2]);
|
||||
int2 dy2 = convert_int2_rte(ratio * src[3]);
|
||||
int2 dx1 = convert_int2(round(ratio * src[0]));
|
||||
int2 dy1 = convert_int2(round(ratio * src[1]));
|
||||
int2 dx2 = convert_int2(round(ratio * src[2]));
|
||||
int2 dy2 = convert_int2(round(ratio * src[3]));
|
||||
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
@ -136,106 +138,9 @@ float icvCalcHaarPatternSum_2(
|
||||
return (float)d;
|
||||
}
|
||||
|
||||
// N = 3
|
||||
float icvCalcHaarPatternSum_3(
|
||||
IMAGE_INT32 sumTex,
|
||||
__constant float4 *src,
|
||||
int oldSize,
|
||||
int newSize,
|
||||
int y, int x,
|
||||
int rows, int cols, int elemPerRow)
|
||||
{
|
||||
|
||||
float ratio = (float)newSize / oldSize;
|
||||
|
||||
F d = 0;
|
||||
|
||||
int4 dx1 = convert_int4_rte(ratio * src[0]);
|
||||
int4 dy1 = convert_int4_rte(ratio * src[1]);
|
||||
int4 dx2 = convert_int4_rte(ratio * src[2]);
|
||||
int4 dy2 = convert_int4_rte(ratio * src[3]);
|
||||
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
|
||||
|
||||
return (float)d;
|
||||
}
|
||||
|
||||
// N = 4
|
||||
float icvCalcHaarPatternSum_4(
|
||||
IMAGE_INT32 sumTex,
|
||||
__constant float4 *src,
|
||||
int oldSize,
|
||||
int newSize,
|
||||
int y, int x,
|
||||
int rows, int cols, int elemPerRow)
|
||||
{
|
||||
|
||||
float ratio = (float)newSize / oldSize;
|
||||
|
||||
F d = 0;
|
||||
|
||||
int4 dx1 = convert_int4_rte(ratio * src[0]);
|
||||
int4 dy1 = convert_int4_rte(ratio * src[1]);
|
||||
int4 dx2 = convert_int4_rte(ratio * src[2]);
|
||||
int4 dy2 = convert_int4_rte(ratio * src[3]);
|
||||
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy1.x), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.x, y + dy2.x), rows, cols, elemPerRow );
|
||||
d += t * src[4].x / ((dx2.x - dx1.x) * (dy2.x - dy1.x));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy1.y), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.y, y + dy2.y), rows, cols, elemPerRow );
|
||||
d += t * src[4].y / ((dx2.y - dx1.y) * (dy2.y - dy1.y));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy1.z), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.z, y + dy2.z), rows, cols, elemPerRow );
|
||||
d += t * src[4].z / ((dx2.z - dx1.z) * (dy2.z - dy1.z));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy1.w), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1.w, y + dy2.w), rows, cols, elemPerRow );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy1.w), rows, cols, elemPerRow );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + dx2.w, y + dy2.w), rows, cols, elemPerRow );
|
||||
d += t * src[4].w / ((dx2.w - dx1.w) * (dy2.w - dy1.w));
|
||||
|
||||
return (float)d;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Hessian
|
||||
|
||||
__constant float4 c_DX[5] = { (float4)(0, 3, 6, 0), (float4)(2, 2, 2, 0), (float4)(3, 6, 9, 0), (float4)(7, 7, 7, 0), (float4)(1, -2, 1, 0) };
|
||||
__constant float4 c_DY[5] = { (float4)(2, 2, 2, 0), (float4)(0, 3, 6, 0), (float4)(7, 7, 7, 0), (float4)(3, 6, 9, 0), (float4)(1, -2, 1, 0) };
|
||||
__constant float4 c_DXY[5] = { (float4)(1, 5, 1, 5), (float4)(1, 1, 5, 5), (float4)(4, 8, 4, 8), (float4)(4, 4, 8, 8), (float4)(1, -1, -1, 1) };// Use integral image to calculate haar wavelets.
|
||||
|
||||
__inline int calcSize(int octave, int layer)
|
||||
{
|
||||
/* Wavelet size at first layer of first octave. */
|
||||
@ -250,6 +155,24 @@ __inline int calcSize(int octave, int layer)
|
||||
return (HAAR_SIZE0 + HAAR_SIZE_INC * layer) << octave;
|
||||
}
|
||||
|
||||
// Calculate a derivative in an axis-aligned direction (x or y). The "plus1"
|
||||
// boxes contribute 1 * (area), and the "minus2" box contributes -2 * (area).
|
||||
// So the final computation is plus1a + plus1b - 2 * minus2. The corners are
|
||||
// labeled A, B, C, and D, with A being the top left, B being top right, C
|
||||
// being bottom left, and D being bottom right.
|
||||
F calcAxisAlignedDerivative(
|
||||
int plus1a_A, int plus1a_B, int plus1a_C, int plus1a_D, F plus1a_scale,
|
||||
int plus1b_A, int plus1b_B, int plus1b_C, int plus1b_D, F plus1b_scale,
|
||||
int minus2_A, int minus2_B, int minus2_C, int minus2_D, F minus2_scale)
|
||||
{
|
||||
F plus1a = plus1a_A - plus1a_B - plus1a_C + plus1a_D;
|
||||
F plus1b = plus1b_A - plus1b_B - plus1b_C + plus1b_D;
|
||||
F minus2 = minus2_A - minus2_B - minus2_C + minus2_D;
|
||||
|
||||
return (plus1a / plus1a_scale -
|
||||
2.0f * minus2 / minus2_scale +
|
||||
plus1b / plus1b_scale);
|
||||
}
|
||||
|
||||
//calculate targeted layer per-pixel determinant and trace with an integral image
|
||||
__kernel void icvCalcLayerDetAndTrace(
|
||||
@ -264,7 +187,7 @@ __kernel void icvCalcLayerDetAndTrace(
|
||||
int c_octave,
|
||||
int c_layer_rows,
|
||||
int sumTex_step
|
||||
)
|
||||
)
|
||||
{
|
||||
det_step /= sizeof(*det);
|
||||
trace_step /= sizeof(*trace);
|
||||
@ -288,16 +211,103 @@ __kernel void icvCalcLayerDetAndTrace(
|
||||
|
||||
if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
|
||||
{
|
||||
const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
|
||||
const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
|
||||
const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave, c_img_rows, c_img_cols, sumTex_step);
|
||||
int x = j << c_octave;
|
||||
int y = i << c_octave;
|
||||
|
||||
float ratio = (float)size / 9;
|
||||
|
||||
// Precompute some commonly used values, which are used to offset
|
||||
// texture coordinates in the integral image.
|
||||
int r1 = round(ratio);
|
||||
int r2 = round(ratio * 2.0f);
|
||||
int r3 = round(ratio * 3.0f);
|
||||
int r4 = round(ratio * 4.0f);
|
||||
int r5 = round(ratio * 5.0f);
|
||||
int r6 = round(ratio * 6.0f);
|
||||
int r7 = round(ratio * 7.0f);
|
||||
int r8 = round(ratio * 8.0f);
|
||||
int r9 = round(ratio * 9.0f);
|
||||
|
||||
// Calculate the approximated derivative in the x-direction
|
||||
F d = 0;
|
||||
{
|
||||
// Some of the pixels needed to compute the derivative are
|
||||
// repeated, so we only don't duplicate the fetch here.
|
||||
int t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t07 = read_sumTex( sumTex, sampler, (int2)(x, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t32 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t37 = read_sumTex( sumTex, sampler, (int2)(x + r3, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t62 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t67 = read_sumTex( sumTex, sampler, (int2)(x + r6, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t92 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r2), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t97 = read_sumTex( sumTex, sampler, (int2)(x + r9, y + r7), c_img_rows, c_img_cols, sumTex_step );
|
||||
|
||||
d = calcAxisAlignedDerivative(t02, t07, t32, t37, (r3) * (r7 - r2),
|
||||
t62, t67, t92, t97, (r9 - r6) * (r7 - r2),
|
||||
t32, t37, t62, t67, (r6 - r3) * (r7 - r2));
|
||||
}
|
||||
const float dx = (float)d;
|
||||
|
||||
// Calculate the approximated derivative in the y-direction
|
||||
d = 0;
|
||||
{
|
||||
// Some of the pixels needed to compute the derivative are
|
||||
// repeated, so we only don't duplicate the fetch here.
|
||||
int t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t23 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r3), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t70 = read_sumTex( sumTex, sampler, (int2)(x + r7, y), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t73 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r3), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t26 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r6), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t76 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r6), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t29 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r9), c_img_rows, c_img_cols, sumTex_step );
|
||||
int t79 = read_sumTex( sumTex, sampler, (int2)(x + r7, y + r9), c_img_rows, c_img_cols, sumTex_step );
|
||||
|
||||
d = calcAxisAlignedDerivative(t20, t23, t70, t73, (r7 - r2) * (r3),
|
||||
t26, t29, t76, t79, (r7 - r2) * (r9 - r6),
|
||||
t23, t26, t73, t76, (r7 - r2) * (r6 - r3));
|
||||
}
|
||||
const float dy = (float)d;
|
||||
|
||||
// Calculate the approximated derivative in the xy-direction
|
||||
d = 0;
|
||||
{
|
||||
// There's no saving us here, we just have to get all of the pixels in
|
||||
// separate fetches
|
||||
F t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
d += t / ((r4 - r1) * (r4 - r1));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r1), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r4), c_img_rows, c_img_cols, sumTex_step );
|
||||
d -= t / ((r8 - r5) * (r4 - r1));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r1, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r1, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r4, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r4, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
d -= t / ((r4 - r1) * (r8 - r5));
|
||||
|
||||
t = 0;
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r5, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r5, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
t -= read_sumTex( sumTex, sampler, (int2)(x + r8, y + r5), c_img_rows, c_img_cols, sumTex_step );
|
||||
t += read_sumTex( sumTex, sampler, (int2)(x + r8, y + r8), c_img_rows, c_img_cols, sumTex_step );
|
||||
d += t / ((r8 - r5) * (r8 - r5));
|
||||
}
|
||||
const float dxy = (float)d;
|
||||
|
||||
det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
|
||||
trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// NONMAX
|
||||
|
||||
@ -309,10 +319,10 @@ bool within_check(IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int ro
|
||||
|
||||
float d = 0;
|
||||
|
||||
int dx1 = convert_int_rte(ratio * c_DM[0]);
|
||||
int dy1 = convert_int_rte(ratio * c_DM[1]);
|
||||
int dx2 = convert_int_rte(ratio * c_DM[2]);
|
||||
int dy2 = convert_int_rte(ratio * c_DM[3]);
|
||||
int dx1 = round(ratio * c_DM[0]);
|
||||
int dy1 = round(ratio * c_DM[1]);
|
||||
int dx2 = round(ratio * c_DM[2]);
|
||||
int dy2 = round(ratio * c_DM[3]);
|
||||
|
||||
float t = 0;
|
||||
|
||||
@ -572,7 +582,7 @@ void icvFindMaximaInLayer(
|
||||
}
|
||||
|
||||
// solve 3x3 linear system Ax=b for floating point input
|
||||
inline bool solve3x3_float(volatile __local const float4 *A, volatile __local const float *b, volatile __local float *x)
|
||||
inline bool solve3x3_float(const float4 *A, const float *b, float *x)
|
||||
{
|
||||
float det = A[0].x * (A[1].y * A[2].z - A[1].z * A[2].y)
|
||||
- A[0].y * (A[1].x * A[2].z - A[1].z * A[2].x)
|
||||
@ -651,7 +661,7 @@ void icvInterpolateKeypoint(
|
||||
|
||||
if (get_local_id(0) == 0 && get_local_id(1) == 0 && get_local_id(2) == 0)
|
||||
{
|
||||
volatile __local float dD[3];
|
||||
float dD[3];
|
||||
|
||||
//dx
|
||||
dD[0] = -0.5f * (N9[1][1][2] - N9[1][1][0]);
|
||||
@ -660,7 +670,7 @@ void icvInterpolateKeypoint(
|
||||
//ds
|
||||
dD[2] = -0.5f * (N9[2][1][1] - N9[0][1][1]);
|
||||
|
||||
volatile __local float4 H[3];
|
||||
float4 H[3];
|
||||
|
||||
//dxx
|
||||
H[0].x = N9[1][1][0] - 2.0f * N9[1][1][1] + N9[1][1][2];
|
||||
@ -681,7 +691,7 @@ void icvInterpolateKeypoint(
|
||||
//dss
|
||||
H[2].z = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
|
||||
|
||||
volatile __local float x[3];
|
||||
float x[3];
|
||||
|
||||
if (solve3x3_float(H, dD, x))
|
||||
{
|
||||
@ -711,7 +721,7 @@ void icvInterpolateKeypoint(
|
||||
sampled in a circle of radius 6s using wavelets of size 4s.
|
||||
We ensure the gradient wavelet size is even to ensure the
|
||||
wavelet pattern is balanced and symmetric around its center */
|
||||
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
|
||||
const int grad_wav_size = 2 * round(2.0f * s);
|
||||
|
||||
// check when grad_wav_size is too big
|
||||
if ((c_img_rows + 1) >= grad_wav_size && (c_img_cols + 1) >= grad_wav_size)
|
||||
@ -737,9 +747,12 @@ void icvInterpolateKeypoint(
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Orientation
|
||||
|
||||
#define ORI_SEARCH_INC 5
|
||||
#define ORI_WIN 60
|
||||
#define ORI_SAMPLES 113
|
||||
#define ORI_WIN 60
|
||||
#define ORI_SAMPLES 113
|
||||
|
||||
// The distance between samples in the beginning of the the reduction
|
||||
#define ORI_RESPONSE_REDUCTION_WIDTH 48
|
||||
#define ORI_RESPONSE_ARRAY_SIZE (ORI_RESPONSE_REDUCTION_WIDTH * 2)
|
||||
|
||||
__constant float c_aptX[ORI_SAMPLES] = {-6, -5, -5, -5, -5, -5, -5, -5, -4, -4, -4, -4, -4, -4, -4, -4, -4, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6};
|
||||
__constant float c_aptY[ORI_SAMPLES] = {0, -3, -2, -1, 0, 1, 2, 3, -4, -3, -2, -1, 0, 1, 2, 3, 4, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, -4, -3, -2, -1, 0, 1, 2, 3, 4, -3, -2, -1, 0, 1, 2, 3, 0};
|
||||
@ -833,12 +846,15 @@ void icvCalcOrientation(
|
||||
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
|
||||
|
||||
|
||||
volatile __local float s_X[128];
|
||||
volatile __local float s_Y[128];
|
||||
volatile __local float s_angle[128];
|
||||
__local float s_X[ORI_SAMPLES];
|
||||
__local float s_Y[ORI_SAMPLES];
|
||||
__local float s_angle[ORI_SAMPLES];
|
||||
|
||||
volatile __local float s_sumx[32 * 4];
|
||||
volatile __local float s_sumy[32 * 4];
|
||||
// Need to allocate enough to make the reduction work without accessing
|
||||
// past the end of the array.
|
||||
__local float s_sumx[ORI_RESPONSE_ARRAY_SIZE];
|
||||
__local float s_sumy[ORI_RESPONSE_ARRAY_SIZE];
|
||||
__local float s_mod[ORI_RESPONSE_ARRAY_SIZE];
|
||||
|
||||
/* The sampling intervals and wavelet sized for selecting an orientation
|
||||
and building the keypoint descriptor are defined relative to 's' */
|
||||
@ -849,28 +865,60 @@ void icvCalcOrientation(
|
||||
sampled in a circle of radius 6s using wavelets of size 4s.
|
||||
We ensure the gradient wavelet size is even to ensure the
|
||||
wavelet pattern is balanced and symmetric around its center */
|
||||
const int grad_wav_size = 2 * convert_int_rte(2.0f * s);
|
||||
const int grad_wav_size = 2 * round(2.0f * s);
|
||||
|
||||
// check when grad_wav_size is too big
|
||||
if ((c_img_rows + 1) < grad_wav_size || (c_img_cols + 1) < grad_wav_size)
|
||||
return;
|
||||
|
||||
// Calc X, Y, angle and store it to shared memory
|
||||
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
|
||||
const int tid = get_local_id(0);
|
||||
// Initialize values that are only used as part of the reduction later.
|
||||
if (tid < ORI_RESPONSE_ARRAY_SIZE - ORI_LOCAL_SIZE) {
|
||||
s_mod[tid + ORI_LOCAL_SIZE] = 0.0f;
|
||||
}
|
||||
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
float ratio = (float)grad_wav_size / 4;
|
||||
|
||||
if (tid < ORI_SAMPLES)
|
||||
int r2 = round(ratio * 2.0);
|
||||
int r4 = round(ratio * 4.0);
|
||||
for (int i = tid; i < ORI_SAMPLES; i += ORI_LOCAL_SIZE )
|
||||
{
|
||||
float X = 0.0f, Y = 0.0f, angle = 0.0f;
|
||||
const float margin = (float)(grad_wav_size - 1) / 2.0f;
|
||||
const int x = convert_int_rte(featureX[get_group_id(0)] + c_aptX[tid] * s - margin);
|
||||
const int y = convert_int_rte(featureY[get_group_id(0)] + c_aptY[tid] * s - margin);
|
||||
const int x = round(featureX[get_group_id(0)] + c_aptX[i] * s - margin);
|
||||
const int y = round(featureY[get_group_id(0)] + c_aptY[i] * s - margin);
|
||||
|
||||
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
|
||||
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
|
||||
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
|
||||
{
|
||||
X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
|
||||
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x, c_img_rows, c_img_cols, sum_step);
|
||||
|
||||
float apt = c_aptW[i];
|
||||
|
||||
// Compute the haar sum without fetching duplicate pixels.
|
||||
float t00 = read_sumTex( sumTex, sampler, (int2)(x, y), c_img_rows, c_img_cols, sum_step);
|
||||
float t02 = read_sumTex( sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sum_step);
|
||||
float t04 = read_sumTex( sumTex, sampler, (int2)(x, y + r4), c_img_rows, c_img_cols, sum_step);
|
||||
float t20 = read_sumTex( sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sum_step);
|
||||
float t24 = read_sumTex( sumTex, sampler, (int2)(x + r2, y + r4), c_img_rows, c_img_cols, sum_step);
|
||||
float t40 = read_sumTex( sumTex, sampler, (int2)(x + r4, y), c_img_rows, c_img_cols, sum_step);
|
||||
float t42 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r2), c_img_rows, c_img_cols, sum_step);
|
||||
float t44 = read_sumTex( sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sum_step);
|
||||
|
||||
F t = t00 - t04 - t20 + t24;
|
||||
X -= t / ((r2) * (r4));
|
||||
|
||||
t = t20 - t24 - t40 + t44;
|
||||
X += t / ((r4 - r2) * (r4));
|
||||
|
||||
t = t00 - t02 - t40 + t42;
|
||||
Y += t / ((r2) * (r4));
|
||||
|
||||
t = t02 - t04 - t42 + t44;
|
||||
Y -= t / ((r4) * (r4 - r2));
|
||||
|
||||
X = apt*X;
|
||||
Y = apt*Y;
|
||||
|
||||
angle = atan2(Y, X);
|
||||
|
||||
@ -879,76 +927,61 @@ void icvCalcOrientation(
|
||||
angle *= 180.0f / CV_PI_F;
|
||||
|
||||
}
|
||||
|
||||
s_X[i] = X;
|
||||
s_Y[i] = Y;
|
||||
s_angle[i] = angle;
|
||||
}
|
||||
s_X[tid] = X;
|
||||
s_Y[tid] = Y;
|
||||
s_angle[tid] = angle;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
float bestx = 0, besty = 0, best_mod = 0;
|
||||
float sumx = 0.0f, sumy = 0.0f;
|
||||
const int dir = tid * ORI_SEARCH_INC;
|
||||
#pragma unroll
|
||||
for (int i = 0; i < ORI_SAMPLES; ++i) {
|
||||
int angle = round(s_angle[i]);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < 18; ++i)
|
||||
{
|
||||
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
|
||||
int d = abs(angle - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[i];
|
||||
sumy += s_Y[i];
|
||||
}
|
||||
}
|
||||
s_sumx[tid] = sumx;
|
||||
s_sumy[tid] = sumy;
|
||||
s_mod[tid] = sumx*sumx + sumy*sumy;
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
volatile float sumx = 0.0f, sumy = 0.0f;
|
||||
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx = s_X[get_local_id(0)];
|
||||
sumy = s_Y[get_local_id(0)];
|
||||
}
|
||||
d = abs(convert_int_rte(s_angle[get_local_id(0) + 32]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[get_local_id(0) + 32];
|
||||
sumy += s_Y[get_local_id(0) + 32];
|
||||
}
|
||||
d = abs(convert_int_rte(s_angle[get_local_id(0) + 64]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[get_local_id(0) + 64];
|
||||
sumy += s_Y[get_local_id(0) + 64];
|
||||
}
|
||||
d = abs(convert_int_rte(s_angle[get_local_id(0) + 96]) - dir);
|
||||
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
|
||||
{
|
||||
sumx += s_X[get_local_id(0) + 96];
|
||||
sumy += s_Y[get_local_id(0) + 96];
|
||||
}
|
||||
reduce_32_sum(s_sumx + get_local_id(1) * 32, &sumx, get_local_id(0));
|
||||
reduce_32_sum(s_sumy + get_local_id(1) * 32, &sumy, get_local_id(0));
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
{
|
||||
best_mod = temp_mod;
|
||||
bestx = sumx;
|
||||
besty = sumy;
|
||||
// This reduction searches for the longest wavelet response vector. The first
|
||||
// step uses all of the work items in the workgroup to narrow the search
|
||||
// down to the three candidates. It requires s_mod to have a few more
|
||||
// elements alocated past the work-group size, which are pre-initialized to
|
||||
// 0.0f above.
|
||||
for(int t = ORI_RESPONSE_REDUCTION_WIDTH; t >= 3; t /= 2) {
|
||||
if (tid < t) {
|
||||
if (s_mod[tid] < s_mod[tid + t]) {
|
||||
s_mod[tid] = s_mod[tid + t];
|
||||
s_sumx[tid] = s_sumx[tid + t];
|
||||
s_sumy[tid] = s_sumy[tid + t];
|
||||
}
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
}
|
||||
if (get_local_id(0) == 0)
|
||||
{
|
||||
s_X[get_local_id(1)] = bestx;
|
||||
s_Y[get_local_id(1)] = besty;
|
||||
s_angle[get_local_id(1)] = best_mod;
|
||||
}
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (get_local_id(1) == 0 && get_local_id(0) == 0)
|
||||
// Do the final reduction and write out the result.
|
||||
if (tid == 0)
|
||||
{
|
||||
int bestIdx = 0;
|
||||
|
||||
if (s_angle[1] > s_angle[bestIdx])
|
||||
// The loop above narrowed the search of the longest vector to three
|
||||
// possibilities. Pick the best here.
|
||||
if (s_mod[1] > s_mod[bestIdx])
|
||||
bestIdx = 1;
|
||||
if (s_angle[2] > s_angle[bestIdx])
|
||||
if (s_mod[2] > s_mod[bestIdx])
|
||||
bestIdx = 2;
|
||||
if (s_angle[3] > s_angle[bestIdx])
|
||||
bestIdx = 3;
|
||||
|
||||
float kp_dir = atan2(s_Y[bestIdx], s_X[bestIdx]);
|
||||
float kp_dir = atan2(s_sumy[bestIdx], s_sumx[bestIdx]);
|
||||
if (kp_dir < 0)
|
||||
kp_dir += 2.0f * CV_PI_F;
|
||||
kp_dir *= 180.0f / CV_PI_F;
|
||||
@ -961,7 +994,6 @@ void icvCalcOrientation(
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
__kernel
|
||||
void icvSetUpright(
|
||||
__global float * keypoints,
|
||||
@ -1035,8 +1067,8 @@ inline float linearFilter(
|
||||
|
||||
float out = 0.0f;
|
||||
|
||||
const int x1 = convert_int_rtn(x);
|
||||
const int y1 = convert_int_rtn(y);
|
||||
const int x1 = round(x);
|
||||
const int y1 = round(y);
|
||||
const int x2 = x1 + 1;
|
||||
const int y2 = y1 + 1;
|
||||
|
||||
|
@ -46,6 +46,7 @@
|
||||
|
||||
#ifdef HAVE_OPENCV_OCL
|
||||
#include <cstdio>
|
||||
#include <sstream>
|
||||
#include "opencl_kernels.hpp"
|
||||
|
||||
using namespace cv;
|
||||
@ -57,18 +58,25 @@ namespace cv
|
||||
{
|
||||
namespace ocl
|
||||
{
|
||||
// The number of degrees between orientation samples in calcOrientation
|
||||
const static int ORI_SEARCH_INC = 5;
|
||||
// The local size of the calcOrientation kernel
|
||||
const static int ORI_LOCAL_SIZE = (360 / ORI_SEARCH_INC);
|
||||
|
||||
static void openCLExecuteKernelSURF(Context *clCxt, const cv::ocl::ProgramEntry* source, String kernelName, size_t globalThreads[3],
|
||||
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
|
||||
{
|
||||
char optBuf [100] = {0};
|
||||
char * optBufPtr = optBuf;
|
||||
std::stringstream optsStr;
|
||||
optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
|
||||
optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
|
||||
cl_kernel kernel;
|
||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optBufPtr);
|
||||
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optsStr.str().c_str());
|
||||
size_t wave_size = queryWaveFrontSize(kernel);
|
||||
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
|
||||
sprintf(optBufPtr, "-D WAVE_SIZE=%d", static_cast<int>(wave_size));
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optBufPtr);
|
||||
optsStr << "-D WAVE_SIZE=" << wave_size;
|
||||
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -601,8 +609,8 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&img_cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&surf_.sum.step));
|
||||
|
||||
size_t localThreads[3] = {32, 4, 1};
|
||||
size_t globalThreads[3] = {nFeatures *localThreads[0], localThreads[1], 1};
|
||||
size_t localThreads[3] = {ORI_LOCAL_SIZE, 1, 1};
|
||||
size_t globalThreads[3] = {nFeatures * localThreads[0], 1, 1};
|
||||
|
||||
openCLExecuteKernelSURF(clCxt, &surfprog, kernelName, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
|
@ -44,6 +44,12 @@ PERF_TEST_P(ImageName_MinSize, CascadeClassifierLBPFrontalFace,
|
||||
cc.detectMultiScale(img, faces, 1.1, 3, 0, minSize);
|
||||
stopTimer();
|
||||
}
|
||||
// for some reason OpenCL version detects the face, which CPU version does not detect, we just remove it
|
||||
// TODO better solution: implement smart way of comparing two set of rectangles
|
||||
if( filename == "cv/shared/1_itseez-0000492.png" && faces.size() == (size_t)3 )
|
||||
{
|
||||
faces.erase(faces.begin());
|
||||
}
|
||||
|
||||
std::sort(faces.begin(), faces.end(), comparators::RectLess());
|
||||
SANITY_CHECK(faces, 3.001 * faces.size());
|
||||
|
@ -654,6 +654,7 @@ bool LBPEvaluator::Feature :: read(const FileNode& node )
|
||||
LBPEvaluator::LBPEvaluator()
|
||||
{
|
||||
features = makePtr<std::vector<Feature> >();
|
||||
optfeatures = makePtr<std::vector<OptFeature> >();
|
||||
}
|
||||
LBPEvaluator::~LBPEvaluator()
|
||||
{
|
||||
@ -662,11 +663,12 @@ LBPEvaluator::~LBPEvaluator()
|
||||
bool LBPEvaluator::read( const FileNode& node )
|
||||
{
|
||||
features->resize(node.size());
|
||||
featuresPtr = &(*features)[0];
|
||||
optfeaturesPtr = &(*optfeatures)[0];
|
||||
FileNodeIterator it = node.begin(), it_end = node.end();
|
||||
std::vector<Feature>& ff = *features;
|
||||
for(int i = 0; it != it_end; ++it, i++)
|
||||
{
|
||||
if(!featuresPtr[i].read(*it))
|
||||
if(!ff[i].read(*it))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@ -677,31 +679,58 @@ Ptr<FeatureEvaluator> LBPEvaluator::clone() const
|
||||
Ptr<LBPEvaluator> ret = makePtr<LBPEvaluator>();
|
||||
ret->origWinSize = origWinSize;
|
||||
ret->features = features;
|
||||
ret->featuresPtr = &(*ret->features)[0];
|
||||
ret->optfeatures = optfeatures;
|
||||
ret->optfeaturesPtr = ret->optfeatures.empty() ? 0 : &(*ret->optfeatures)[0];
|
||||
ret->sum0 = sum0, ret->sum = sum;
|
||||
ret->normrect = normrect;
|
||||
ret->offset = offset;
|
||||
ret->pwin = pwin;
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size )
|
||||
bool LBPEvaluator::setImage( InputArray _image, Size _origWinSize, Size _sumSize )
|
||||
{
|
||||
Mat image = _image.getMat();
|
||||
int rn = image.rows+1, cn = image.cols+1;
|
||||
origWinSize = _origWinSize;
|
||||
Size imgsz = _image.size();
|
||||
int cols = imgsz.width, rows = imgsz.height;
|
||||
|
||||
if( image.cols < origWinSize.width || image.rows < origWinSize.height )
|
||||
if (imgsz.width < origWinSize.width || imgsz.height < origWinSize.height)
|
||||
return false;
|
||||
|
||||
if( sum0.rows < rn || sum0.cols < cn )
|
||||
origWinSize = _origWinSize;
|
||||
|
||||
int rn = _sumSize.height, cn = _sumSize.width;
|
||||
int sumStep;
|
||||
CV_Assert(rn >= rows+1 && cn >= cols+1);
|
||||
|
||||
if( _image.isUMat() )
|
||||
{
|
||||
usum0.create(rn, cn, CV_32S);
|
||||
usum = UMat(usum0, Rect(0, 0, cols+1, rows+1));
|
||||
|
||||
integral(_image, usum, noArray(), noArray(), CV_32S);
|
||||
sumStep = (int)(usum.step/usum.elemSize());
|
||||
}
|
||||
else
|
||||
{
|
||||
sum0.create(rn, cn, CV_32S);
|
||||
sum = Mat(rn, cn, CV_32S, sum0.data);
|
||||
integral(image, sum);
|
||||
sum = sum0(Rect(0, 0, cols+1, rows+1));
|
||||
|
||||
integral(_image, sum, noArray(), noArray(), CV_32S);
|
||||
sumStep = (int)(sum.step/sum.elemSize());
|
||||
}
|
||||
|
||||
size_t fi, nfeatures = features->size();
|
||||
const std::vector<Feature>& ff = *features;
|
||||
|
||||
if( sumSize0 != _sumSize )
|
||||
{
|
||||
optfeatures->resize(nfeatures);
|
||||
optfeaturesPtr = &(*optfeatures)[0];
|
||||
for( fi = 0; fi < nfeatures; fi++ )
|
||||
optfeaturesPtr[fi].setOffsets( ff[fi], sumStep );
|
||||
}
|
||||
if( _image.isUMat() && (sumSize0 != _sumSize || ufbuf.empty()) )
|
||||
copyVectorToUMat(*optfeatures, ufbuf);
|
||||
sumSize0 = _sumSize;
|
||||
|
||||
for( fi = 0; fi < nfeatures; fi++ )
|
||||
featuresPtr[fi].updatePtrs( sum );
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -711,10 +740,18 @@ bool LBPEvaluator::setWindow( Point pt )
|
||||
pt.x + origWinSize.width >= sum.cols ||
|
||||
pt.y + origWinSize.height >= sum.rows )
|
||||
return false;
|
||||
offset = pt.y * ((int)sum.step/sizeof(int)) + pt.x;
|
||||
pwin = &sum.at<int>(pt);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void LBPEvaluator::getUMats(std::vector<UMat>& bufs)
|
||||
{
|
||||
bufs.clear();
|
||||
bufs.push_back(usum);
|
||||
bufs.push_back(ufbuf);
|
||||
}
|
||||
|
||||
//---------------------------------------------- HOGEvaluator ---------------------------------------
|
||||
bool HOGEvaluator::Feature :: read( const FileNode& node )
|
||||
{
|
||||
@ -1133,50 +1170,84 @@ bool CascadeClassifierImpl::detectSingleScale( InputArray _image, Size processin
|
||||
bool CascadeClassifierImpl::ocl_detectSingleScale( InputArray _image, Size processingRectSize,
|
||||
int yStep, double factor, Size sumSize0 )
|
||||
{
|
||||
const int VECTOR_SIZE = 1;
|
||||
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
|
||||
if( haar.empty() )
|
||||
return false;
|
||||
|
||||
haar->setImage(_image, data.origWinSize, sumSize0);
|
||||
|
||||
if( cascadeKernel.empty() )
|
||||
{
|
||||
cascadeKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc,
|
||||
format("-D VECTOR_SIZE=%d", VECTOR_SIZE));
|
||||
if( cascadeKernel.empty() )
|
||||
return false;
|
||||
}
|
||||
int featureType = getFeatureType();
|
||||
std::vector<UMat> bufs;
|
||||
size_t globalsize[] = { processingRectSize.width/yStep, processingRectSize.height/yStep };
|
||||
bool ok = false;
|
||||
|
||||
if( ustages.empty() )
|
||||
{
|
||||
copyVectorToUMat(data.stages, ustages);
|
||||
copyVectorToUMat(data.stumps, ustumps);
|
||||
if( !data.subsets.empty() )
|
||||
copyVectorToUMat(data.subsets, usubsets);
|
||||
}
|
||||
|
||||
std::vector<UMat> bufs;
|
||||
haar->getUMats(bufs);
|
||||
CV_Assert(bufs.size() == 3);
|
||||
if( featureType == FeatureEvaluator::HAAR )
|
||||
{
|
||||
Ptr<HaarEvaluator> haar = featureEvaluator.dynamicCast<HaarEvaluator>();
|
||||
if( haar.empty() )
|
||||
return false;
|
||||
|
||||
Rect normrect = haar->getNormRect();
|
||||
haar->setImage(_image, data.origWinSize, sumSize0);
|
||||
if( haarKernel.empty() )
|
||||
{
|
||||
haarKernel.create("runHaarClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
|
||||
if( haarKernel.empty() )
|
||||
return false;
|
||||
}
|
||||
|
||||
//processingRectSize = Size(yStep, yStep);
|
||||
size_t globalsize[] = { (processingRectSize.width/yStep + VECTOR_SIZE-1)/VECTOR_SIZE, processingRectSize.height/yStep };
|
||||
haar->getUMats(bufs);
|
||||
Rect normrect = haar->getNormRect();
|
||||
|
||||
cascadeKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
|
||||
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
|
||||
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
||||
haarKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
|
||||
ocl::KernelArg::ReadOnlyNoSize(bufs[1]), // sqsum
|
||||
ocl::KernelArg::PtrReadOnly(bufs[2]), // optfeatures
|
||||
|
||||
// cascade classifier
|
||||
(int)data.stages.size(),
|
||||
ocl::KernelArg::PtrReadOnly(ustages),
|
||||
ocl::KernelArg::PtrReadOnly(ustumps),
|
||||
// cascade classifier
|
||||
(int)data.stages.size(),
|
||||
ocl::KernelArg::PtrReadOnly(ustages),
|
||||
ocl::KernelArg::PtrReadOnly(ustumps),
|
||||
|
||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||
processingRectSize,
|
||||
yStep, (float)factor,
|
||||
normrect, data.origWinSize, MAX_FACES);
|
||||
bool ok = cascadeKernel.run(2, globalsize, 0, true);
|
||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||
processingRectSize,
|
||||
yStep, (float)factor,
|
||||
normrect, data.origWinSize, MAX_FACES);
|
||||
ok = haarKernel.run(2, globalsize, 0, true);
|
||||
}
|
||||
else if( featureType == FeatureEvaluator::LBP )
|
||||
{
|
||||
Ptr<LBPEvaluator> lbp = featureEvaluator.dynamicCast<LBPEvaluator>();
|
||||
if( lbp.empty() )
|
||||
return false;
|
||||
|
||||
lbp->setImage(_image, data.origWinSize, sumSize0);
|
||||
if( lbpKernel.empty() )
|
||||
{
|
||||
lbpKernel.create("runLBPClassifierStump", ocl::objdetect::cascadedetect_oclsrc, "");
|
||||
if( lbpKernel.empty() )
|
||||
return false;
|
||||
}
|
||||
|
||||
lbp->getUMats(bufs);
|
||||
|
||||
int subsetSize = (data.ncategories + 31)/32;
|
||||
lbpKernel.args(ocl::KernelArg::ReadOnlyNoSize(bufs[0]), // sum
|
||||
ocl::KernelArg::PtrReadOnly(bufs[1]), // optfeatures
|
||||
|
||||
// cascade classifier
|
||||
(int)data.stages.size(),
|
||||
ocl::KernelArg::PtrReadOnly(ustages),
|
||||
ocl::KernelArg::PtrReadOnly(ustumps),
|
||||
ocl::KernelArg::PtrReadOnly(usubsets),
|
||||
subsetSize,
|
||||
|
||||
ocl::KernelArg::PtrWriteOnly(ufacepos), // positions
|
||||
processingRectSize,
|
||||
yStep, (float)factor,
|
||||
data.origWinSize, MAX_FACES);
|
||||
ok = lbpKernel.run(2, globalsize, 0, true);
|
||||
}
|
||||
//CV_Assert(ok);
|
||||
return ok;
|
||||
}
|
||||
@ -1225,6 +1296,7 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
|
||||
double scaleFactor, Size minObjectSize, Size maxObjectSize,
|
||||
bool outputRejectLevels )
|
||||
{
|
||||
int featureType = getFeatureType();
|
||||
Size imgsz = _image.size();
|
||||
int imgtype = _image.type();
|
||||
|
||||
@ -1238,7 +1310,9 @@ void CascadeClassifierImpl::detectMultiScaleNoGrouping( InputArray _image, std::
|
||||
maxObjectSize = imgsz;
|
||||
|
||||
bool use_ocl = ocl::useOpenCL() &&
|
||||
getFeatureType() == FeatureEvaluator::HAAR &&
|
||||
(featureType == FeatureEvaluator::HAAR ||
|
||||
featureType == FeatureEvaluator::LBP) &&
|
||||
ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
|
||||
!isOldFormatCascade() &&
|
||||
data.isStumpBased() &&
|
||||
maskGenerator.empty() &&
|
||||
@ -1564,7 +1638,8 @@ bool CascadeClassifierImpl::Data::read(const FileNode &root)
|
||||
bool CascadeClassifierImpl::read_(const FileNode& root)
|
||||
{
|
||||
tryOpenCL = true;
|
||||
cascadeKernel = ocl::Kernel();
|
||||
haarKernel = ocl::Kernel();
|
||||
lbpKernel = ocl::Kernel();
|
||||
ustages.release();
|
||||
ustumps.release();
|
||||
if( !data.read(root) )
|
||||
|
@ -149,7 +149,7 @@ protected:
|
||||
Ptr<MaskGenerator> maskGenerator;
|
||||
UMat ugrayImage, uimageBuffer;
|
||||
UMat ufacepos, ustages, ustumps, usubsets;
|
||||
ocl::Kernel cascadeKernel;
|
||||
ocl::Kernel haarKernel, lbpKernel;
|
||||
bool tryOpenCL;
|
||||
|
||||
Mutex mtx;
|
||||
@ -250,13 +250,11 @@ public:
|
||||
struct Feature
|
||||
{
|
||||
Feature();
|
||||
|
||||
bool read( const FileNode& node );
|
||||
|
||||
bool tilted;
|
||||
|
||||
enum { RECT_NUM = 3 };
|
||||
|
||||
struct
|
||||
{
|
||||
Rect r;
|
||||
@ -369,14 +367,20 @@ public:
|
||||
{
|
||||
Feature();
|
||||
Feature( int x, int y, int _block_w, int _block_h ) :
|
||||
rect(x, y, _block_w, _block_h) {}
|
||||
rect(x, y, _block_w, _block_h) {}
|
||||
|
||||
int calc( int offset ) const;
|
||||
void updatePtrs( const Mat& sum );
|
||||
bool read(const FileNode& node );
|
||||
|
||||
Rect rect; // weight and height for block
|
||||
const int* p[16]; // fast
|
||||
};
|
||||
|
||||
struct OptFeature
|
||||
{
|
||||
OptFeature();
|
||||
|
||||
int calc( const int* pwin ) const;
|
||||
void setOffsets( const Feature& _f, int step );
|
||||
int ofs[16];
|
||||
};
|
||||
|
||||
LBPEvaluator();
|
||||
@ -388,55 +392,60 @@ public:
|
||||
|
||||
virtual bool setImage(InputArray image, Size _origWinSize, Size);
|
||||
virtual bool setWindow(Point pt);
|
||||
virtual void getUMats(std::vector<UMat>& bufs);
|
||||
|
||||
int operator()(int featureIdx) const
|
||||
{ return featuresPtr[featureIdx].calc(offset); }
|
||||
{ return optfeaturesPtr[featureIdx].calc(pwin); }
|
||||
virtual int calcCat(int featureIdx) const
|
||||
{ return (*this)(featureIdx); }
|
||||
protected:
|
||||
Size origWinSize;
|
||||
Size origWinSize, sumSize0;
|
||||
Ptr<std::vector<Feature> > features;
|
||||
Feature* featuresPtr; // optimization
|
||||
Mat sum0, sum;
|
||||
Rect normrect;
|
||||
Ptr<std::vector<OptFeature> > optfeatures;
|
||||
OptFeature* optfeaturesPtr; // optimization
|
||||
|
||||
int offset;
|
||||
Mat sum0, sum;
|
||||
UMat usum0, usum, ufbuf;
|
||||
|
||||
const int* pwin;
|
||||
};
|
||||
|
||||
|
||||
inline LBPEvaluator::Feature :: Feature()
|
||||
{
|
||||
rect = Rect();
|
||||
}
|
||||
|
||||
inline LBPEvaluator::OptFeature :: OptFeature()
|
||||
{
|
||||
for( int i = 0; i < 16; i++ )
|
||||
p[i] = 0;
|
||||
ofs[i] = 0;
|
||||
}
|
||||
|
||||
inline int LBPEvaluator::Feature :: calc( int _offset ) const
|
||||
inline int LBPEvaluator::OptFeature :: calc( const int* p ) const
|
||||
{
|
||||
int cval = CALC_SUM_( p[5], p[6], p[9], p[10], _offset );
|
||||
int cval = CALC_SUM_OFS_( ofs[5], ofs[6], ofs[9], ofs[10], p );
|
||||
|
||||
return (CALC_SUM_( p[0], p[1], p[4], p[5], _offset ) >= cval ? 128 : 0) | // 0
|
||||
(CALC_SUM_( p[1], p[2], p[5], p[6], _offset ) >= cval ? 64 : 0) | // 1
|
||||
(CALC_SUM_( p[2], p[3], p[6], p[7], _offset ) >= cval ? 32 : 0) | // 2
|
||||
(CALC_SUM_( p[6], p[7], p[10], p[11], _offset ) >= cval ? 16 : 0) | // 5
|
||||
(CALC_SUM_( p[10], p[11], p[14], p[15], _offset ) >= cval ? 8 : 0)| // 8
|
||||
(CALC_SUM_( p[9], p[10], p[13], p[14], _offset ) >= cval ? 4 : 0)| // 7
|
||||
(CALC_SUM_( p[8], p[9], p[12], p[13], _offset ) >= cval ? 2 : 0)| // 6
|
||||
(CALC_SUM_( p[4], p[5], p[8], p[9], _offset ) >= cval ? 1 : 0);
|
||||
return (CALC_SUM_OFS_( ofs[0], ofs[1], ofs[4], ofs[5], p ) >= cval ? 128 : 0) | // 0
|
||||
(CALC_SUM_OFS_( ofs[1], ofs[2], ofs[5], ofs[6], p ) >= cval ? 64 : 0) | // 1
|
||||
(CALC_SUM_OFS_( ofs[2], ofs[3], ofs[6], ofs[7], p ) >= cval ? 32 : 0) | // 2
|
||||
(CALC_SUM_OFS_( ofs[6], ofs[7], ofs[10], ofs[11], p ) >= cval ? 16 : 0) | // 5
|
||||
(CALC_SUM_OFS_( ofs[10], ofs[11], ofs[14], ofs[15], p ) >= cval ? 8 : 0)| // 8
|
||||
(CALC_SUM_OFS_( ofs[9], ofs[10], ofs[13], ofs[14], p ) >= cval ? 4 : 0)| // 7
|
||||
(CALC_SUM_OFS_( ofs[8], ofs[9], ofs[12], ofs[13], p ) >= cval ? 2 : 0)| // 6
|
||||
(CALC_SUM_OFS_( ofs[4], ofs[5], ofs[8], ofs[9], p ) >= cval ? 1 : 0);
|
||||
}
|
||||
|
||||
inline void LBPEvaluator::Feature :: updatePtrs( const Mat& _sum )
|
||||
inline void LBPEvaluator::OptFeature :: setOffsets( const Feature& _f, int step )
|
||||
{
|
||||
const int* ptr = (const int*)_sum.data;
|
||||
size_t step = _sum.step/sizeof(ptr[0]);
|
||||
Rect tr = rect;
|
||||
CV_SUM_PTRS( p[0], p[1], p[4], p[5], ptr, tr, step );
|
||||
tr.x += 2*rect.width;
|
||||
CV_SUM_PTRS( p[2], p[3], p[6], p[7], ptr, tr, step );
|
||||
tr.y += 2*rect.height;
|
||||
CV_SUM_PTRS( p[10], p[11], p[14], p[15], ptr, tr, step );
|
||||
tr.x -= 2*rect.width;
|
||||
CV_SUM_PTRS( p[8], p[9], p[12], p[13], ptr, tr, step );
|
||||
Rect tr = _f.rect;
|
||||
CV_SUM_OFS( ofs[0], ofs[1], ofs[4], ofs[5], 0, tr, step );
|
||||
tr.x += 2*_f.rect.width;
|
||||
CV_SUM_OFS( ofs[2], ofs[3], ofs[6], ofs[7], 0, tr, step );
|
||||
tr.y += 2*_f.rect.height;
|
||||
CV_SUM_OFS( ofs[10], ofs[11], ofs[14], ofs[15], 0, tr, step );
|
||||
tr.x -= 2*_f.rect.width;
|
||||
CV_SUM_OFS( ofs[8], ofs[9], ofs[12], ofs[13], 0, tr, step );
|
||||
}
|
||||
|
||||
//---------------------------------------------- HOGEvaluator -------------------------------------------
|
||||
|
@ -336,7 +336,7 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
|
||||
out->isStumpBased &= node_count == 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
#ifdef HAVE_IPP
|
||||
int can_use_ipp = !out->has_tilted_features && !out->is_tree && out->isStumpBased;
|
||||
|
||||
@ -392,7 +392,7 @@ icvCreateHidHaarClassifierCascade( CvHaarClassifierCascade* cascade )
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
*/
|
||||
cascade->hid_cascade = out;
|
||||
assert( (char*)haar_node_ptr - (char*)out <= datasize );
|
||||
|
||||
|
@ -1,19 +1,22 @@
|
||||
///////////////////////////// OpenCL kernels for face detection //////////////////////////////
|
||||
////////////////////////////// see the opencv/doc/license.txt ///////////////////////////////
|
||||
|
||||
typedef struct __attribute__((aligned(4))) OptFeature
|
||||
typedef struct __attribute__((aligned(4))) OptHaarFeature
|
||||
{
|
||||
int4 ofs[3] __attribute__((aligned (4)));
|
||||
float4 weight __attribute__((aligned (4)));
|
||||
}
|
||||
OptFeature;
|
||||
OptHaarFeature;
|
||||
|
||||
typedef struct __attribute__((aligned(4))) OptLBPFeature
|
||||
{
|
||||
int16 ofs __attribute__((aligned (4)));
|
||||
}
|
||||
OptLBPFeature;
|
||||
|
||||
typedef struct __attribute__((aligned(4))) Stump
|
||||
{
|
||||
int featureIdx __attribute__((aligned (4)));
|
||||
float threshold __attribute__((aligned (4))); // for ordered features only
|
||||
float left __attribute__((aligned (4)));
|
||||
float right __attribute__((aligned (4)));
|
||||
float4 st __attribute__((aligned (4)));
|
||||
}
|
||||
Stump;
|
||||
|
||||
@ -30,7 +33,7 @@ __kernel void runHaarClassifierStump(
|
||||
int sumstep, int sumoffset,
|
||||
__global const int* sqsum,
|
||||
int sqsumstep, int sqsumoffset,
|
||||
__global const OptFeature* optfeatures,
|
||||
__global const OptHaarFeature* optfeatures,
|
||||
|
||||
int nstages,
|
||||
__global const Stage* stages,
|
||||
@ -47,11 +50,8 @@ __kernel void runHaarClassifierStump(
|
||||
|
||||
if( ix < imgsize.x && iy < imgsize.y )
|
||||
{
|
||||
int ntrees;
|
||||
int stageIdx, i;
|
||||
float s = 0.f;
|
||||
int stageIdx;
|
||||
__global const Stump* stump = stumps;
|
||||
__global const OptFeature* f;
|
||||
|
||||
__global const int* psum = sum + mad24(iy, sumstep, ix);
|
||||
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
|
||||
@ -61,20 +61,19 @@ __kernel void runHaarClassifierStump(
|
||||
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
|
||||
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
|
||||
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
|
||||
float4 weight, vsval;
|
||||
int4 ofs, ofs0, ofs1, ofs2;
|
||||
nf = nf > 0 ? nf : 1.f;
|
||||
|
||||
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
|
||||
{
|
||||
ntrees = stages[stageIdx].ntrees;
|
||||
s = 0.f;
|
||||
int i, ntrees = stages[stageIdx].ntrees;
|
||||
float s = 0.f;
|
||||
for( i = 0; i < ntrees; i++, stump++ )
|
||||
{
|
||||
f = optfeatures + stump->featureIdx;
|
||||
weight = f->weight;
|
||||
float4 st = stump->st;
|
||||
__global const OptHaarFeature* f = optfeatures + as_int(st.x);
|
||||
float4 weight = f->weight;
|
||||
|
||||
ofs = f->ofs[0];
|
||||
int4 ofs = f->ofs[0];
|
||||
sval = (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.x;
|
||||
ofs = f->ofs[1];
|
||||
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.y;
|
||||
@ -84,7 +83,7 @@ __kernel void runHaarClassifierStump(
|
||||
sval += (psum[ofs.x] - psum[ofs.y] - psum[ofs.z] + psum[ofs.w])*weight.z;
|
||||
}
|
||||
|
||||
s += (sval < stump->threshold*nf) ? stump->left : stump->right;
|
||||
s += (sval < st.y*nf) ? st.z : st.w;
|
||||
}
|
||||
|
||||
if( s < stages[stageIdx].threshold )
|
||||
@ -106,13 +105,11 @@ __kernel void runHaarClassifierStump(
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
__kernel void runLBPClassifierStump(
|
||||
__global const int* sum,
|
||||
int sumstep, int sumoffset,
|
||||
__global const int* sqsum,
|
||||
int sqsumstep, int sqsumoffset,
|
||||
__global const OptFeature* optfeatures,
|
||||
__global const OptLBPFeature* optfeatures,
|
||||
|
||||
int nstages,
|
||||
__global const Stage* stages,
|
||||
@ -122,50 +119,48 @@ __kernel void runLBPClassifierStump(
|
||||
|
||||
volatile __global int* facepos,
|
||||
int2 imgsize, int xyscale, float factor,
|
||||
int4 normrect, int2 windowsize, int maxFaces)
|
||||
int2 windowsize, int maxFaces)
|
||||
{
|
||||
int ix = get_global_id(0)*xyscale*VECTOR_SIZE;
|
||||
int ix = get_global_id(0)*xyscale;
|
||||
int iy = get_global_id(1)*xyscale;
|
||||
sumstep /= sizeof(int);
|
||||
sqsumstep /= sizeof(int);
|
||||
|
||||
if( ix < imgsize.x && iy < imgsize.y )
|
||||
{
|
||||
int ntrees;
|
||||
int stageIdx, i;
|
||||
float s = 0.f;
|
||||
int stageIdx;
|
||||
__global const Stump* stump = stumps;
|
||||
__global const int* bitset = bitsets;
|
||||
__global const OptFeature* f;
|
||||
|
||||
__global const int* psum = sum + mad24(iy, sumstep, ix);
|
||||
__global const int* pnsum = psum + mad24(normrect.y, sumstep, normrect.x);
|
||||
int normarea = normrect.z * normrect.w;
|
||||
float invarea = 1.f/normarea;
|
||||
float sval = (pnsum[0] - pnsum[normrect.z] - pnsum[mul24(normrect.w, sumstep)] +
|
||||
pnsum[mad24(normrect.w, sumstep, normrect.z)])*invarea;
|
||||
float sqval = (sqsum[mad24(iy + normrect.y, sqsumstep, ix + normrect.x)])*invarea;
|
||||
float nf = (float)normarea * sqrt(max(sqval - sval * sval, 0.f));
|
||||
float4 weight;
|
||||
int4 ofs;
|
||||
nf = nf > 0 ? nf : 1.f;
|
||||
__global const int* p = sum + mad24(iy, sumstep, ix);
|
||||
|
||||
for( stageIdx = 0; stageIdx < nstages; stageIdx++ )
|
||||
{
|
||||
ntrees = stages[stageIdx].ntrees;
|
||||
s = 0.f;
|
||||
for( i = 0; i < ntrees; i++, stump++, bitset += bitsetSize )
|
||||
int i, ntrees = stages[stageIdx].ntrees;
|
||||
float s = 0.f;
|
||||
for( i = 0; i < ntrees; i++, stump++, bitsets += bitsetSize )
|
||||
{
|
||||
f = optfeatures + stump->featureIdx;
|
||||
float4 st = stump->st;
|
||||
__global const OptLBPFeature* f = optfeatures + as_int(st.x);
|
||||
int16 ofs = f->ofs;
|
||||
|
||||
weight = f->weight;
|
||||
#define CALC_SUM_OFS_(p0, p1, p2, p3, ptr) \
|
||||
((ptr)[p0] - (ptr)[p1] - (ptr)[p2] + (ptr)[p3])
|
||||
|
||||
// compute LBP feature to val
|
||||
s += (bitset[val >> 5] & (1 << (val & 31))) ? stump->left : stump->right;
|
||||
int cval = CALC_SUM_OFS_( ofs.s5, ofs.s6, ofs.s9, ofs.sa, p );
|
||||
|
||||
int mask, idx = (CALC_SUM_OFS_( ofs.s0, ofs.s1, ofs.s4, ofs.s5, p ) >= cval ? 4 : 0); // 0
|
||||
idx |= (CALC_SUM_OFS_( ofs.s1, ofs.s2, ofs.s5, ofs.s6, p ) >= cval ? 2 : 0); // 1
|
||||
idx |= (CALC_SUM_OFS_( ofs.s2, ofs.s3, ofs.s6, ofs.s7, p ) >= cval ? 1 : 0); // 2
|
||||
|
||||
mask = (CALC_SUM_OFS_( ofs.s6, ofs.s7, ofs.sa, ofs.sb, p ) >= cval ? 16 : 0); // 5
|
||||
mask |= (CALC_SUM_OFS_( ofs.sa, ofs.sb, ofs.se, ofs.sf, p ) >= cval ? 8 : 0); // 8
|
||||
mask |= (CALC_SUM_OFS_( ofs.s9, ofs.sa, ofs.sd, ofs.se, p ) >= cval ? 4 : 0); // 7
|
||||
mask |= (CALC_SUM_OFS_( ofs.s8, ofs.s9, ofs.sc, ofs.sd, p ) >= cval ? 2 : 0); // 6
|
||||
mask |= (CALC_SUM_OFS_( ofs.s4, ofs.s5, ofs.s8, ofs.s9, p ) >= cval ? 1 : 0); // 7
|
||||
|
||||
s += (bitsets[idx] & (1 << mask)) ? st.z : st.w;
|
||||
}
|
||||
|
||||
if( s < stages[stageIdx].threshold )
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
if( stageIdx == nstages )
|
||||
@ -182,4 +177,3 @@ __kernel void runLBPClassifierStump(
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -699,3 +699,138 @@ Returns block descriptors computed for the whole image.
|
||||
* **DESCR_FORMAT_COL_BY_COL** - Column-major order.
|
||||
|
||||
The function is mainly used to learn the classifier.
|
||||
|
||||
|
||||
|
||||
ocl::ORB_OCL
|
||||
--------------
|
||||
.. ocv:class:: ocl::ORB_OCL
|
||||
|
||||
Class for extracting ORB features and descriptors from an image. ::
|
||||
|
||||
class ORB_OCL
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
X_ROW = 0,
|
||||
Y_ROW,
|
||||
RESPONSE_ROW,
|
||||
ANGLE_ROW,
|
||||
OCTAVE_ROW,
|
||||
SIZE_ROW,
|
||||
ROWS_COUNT
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
DEFAULT_FAST_THRESHOLD = 20
|
||||
};
|
||||
|
||||
explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f,
|
||||
int nLevels = 8, int edgeThreshold = 31,
|
||||
int firstLevel = 0, int WTA_K = 2,
|
||||
int scoreType = 0, int patchSize = 31);
|
||||
|
||||
void operator()(const oclMat& image, const oclMat& mask,
|
||||
std::vector<KeyPoint>& keypoints);
|
||||
void operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
|
||||
|
||||
void operator()(const oclMat& image, const oclMat& mask,
|
||||
std::vector<KeyPoint>& keypoints, oclMat& descriptors);
|
||||
void operator()(const oclMat& image, const oclMat& mask,
|
||||
oclMat& keypoints, oclMat& descriptors);
|
||||
|
||||
void downloadKeyPoints(oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
void convertKeyPoints(Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
int descriptorSize() const;
|
||||
int descriptorType() const;
|
||||
int defaultNorm() const;
|
||||
|
||||
void setFastParams(int threshold, bool nonmaxSupression = true);
|
||||
|
||||
void release();
|
||||
|
||||
bool blurForDescriptor;
|
||||
};
|
||||
|
||||
The class implements ORB feature detection and description algorithm.
|
||||
|
||||
|
||||
|
||||
ocl::ORB_OCL::ORB_OCL
|
||||
------------------------
|
||||
Constructor.
|
||||
|
||||
.. ocv:function:: ocl::ORB_OCL::ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31, int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31)
|
||||
|
||||
:param nfeatures: The maximum number of features to retain.
|
||||
|
||||
:param scaleFactor: Pyramid decimation ratio, greater than 1. ``scaleFactor==2`` means the classical pyramid, where each next level has 4x less pixels than the previous, but such a big scale factor will degrade feature matching scores dramatically. On the other hand, too close to 1 scale factor will mean that to cover certain scale range you will need more pyramid levels and so the speed will suffer.
|
||||
|
||||
:param nlevels: The number of pyramid levels. The smallest level will have linear size equal to ``input_image_linear_size/pow(scaleFactor, nlevels)``.
|
||||
|
||||
:param edgeThreshold: This is size of the border where the features are not detected. It should roughly match the ``patchSize`` parameter.
|
||||
|
||||
:param firstLevel: It should be 0 in the current implementation.
|
||||
|
||||
:param WTA_K: The number of points that produce each element of the oriented BRIEF descriptor. The default value 2 means the BRIEF where we take a random point pair and compare their brightnesses, so we get 0/1 response. Other possible values are 3 and 4. For example, 3 means that we take 3 random points (of course, those point coordinates are random, but they are generated from the pre-defined seed, so each element of BRIEF descriptor is computed deterministically from the pixel rectangle), find point of maximum brightness and output index of the winner (0, 1 or 2). Such output will occupy 2 bits, and therefore it will need a special variant of Hamming distance, denoted as ``NORM_HAMMING2`` (2 bits per bin). When ``WTA_K=4``, we take 4 random points to compute each bin (that will also occupy 2 bits with possible values 0, 1, 2 or 3).
|
||||
|
||||
:param scoreType: The default HARRIS_SCORE means that Harris algorithm is used to rank features (the score is written to ``KeyPoint::score`` and is used to retain best ``nfeatures`` features); FAST_SCORE is alternative value of the parameter that produces slightly less stable keypoints, but it is a little faster to compute.
|
||||
|
||||
:param patchSize: size of the patch used by the oriented BRIEF descriptor. Of course, on smaller pyramid layers the perceived image area covered by a feature will be larger.
|
||||
|
||||
|
||||
|
||||
ocl::ORB_OCL::operator()
|
||||
--------------------------
|
||||
Detects keypoints and computes descriptors for them.
|
||||
|
||||
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints)
|
||||
|
||||
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints)
|
||||
|
||||
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors)
|
||||
|
||||
.. ocv:function:: void ocl::ORB_OCL::operator()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors)
|
||||
|
||||
:param image: Input 8-bit grayscale image.
|
||||
|
||||
:param mask: Optional input mask that marks the regions where we should detect features.
|
||||
|
||||
:param keypoints: The input/output vector of keypoints. Can be stored both in host and device memory. For device memory:
|
||||
|
||||
* ``X_ROW`` contains the horizontal coordinate of the i'th feature.
|
||||
* ``Y_ROW`` contains the vertical coordinate of the i'th feature.
|
||||
* ``RESPONSE_ROW`` contains the response of the i'th feature.
|
||||
* ``ANGLE_ROW`` contains the orientation of the i'th feature.
|
||||
* ``RESPONSE_ROW`` contains the octave of the i'th feature.
|
||||
* ``ANGLE_ROW`` contains the size of the i'th feature.
|
||||
|
||||
:param descriptors: Computed descriptors. if ``blurForDescriptor`` is true, image will be blurred before descriptors calculation.
|
||||
|
||||
|
||||
|
||||
ocl::ORB_OCL::downloadKeyPoints
|
||||
---------------------------------
|
||||
Download keypoints from device to host memory.
|
||||
|
||||
.. ocv:function:: static void ocl::ORB_OCL::downloadKeyPoints( const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints )
|
||||
|
||||
|
||||
|
||||
ocl::ORB_OCL::convertKeyPoints
|
||||
--------------------------------
|
||||
Converts keypoints from OCL representation to vector of ``KeyPoint``.
|
||||
|
||||
.. ocv:function:: static void ocl::ORB_OCL::convertKeyPoints( const Mat& d_keypoints, std::vector<KeyPoint>& keypoints )
|
||||
|
||||
|
||||
|
||||
ocl::ORB_OCL::release
|
||||
-----------------------
|
||||
Releases inner buffer memory.
|
||||
|
||||
.. ocv:function:: void ocl::ORB_OCL::release()
|
||||
|
@ -287,7 +287,7 @@ ocl::createSeparableLinearFilter_GPU
|
||||
----------------------------------------
|
||||
Creates a separable linear filter engine.
|
||||
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT)
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel, const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
|
||||
|
||||
:param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
|
||||
|
||||
@ -303,6 +303,8 @@ Creates a separable linear filter engine.
|
||||
|
||||
:param bordertype: Pixel extrapolation method.
|
||||
|
||||
:param imgSize: Source image size to choose optimal method for processing.
|
||||
|
||||
.. seealso:: :ocv:func:`ocl::getLinearRowFilter_GPU`, :ocv:func:`ocl::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter`
|
||||
|
||||
|
||||
@ -334,7 +336,7 @@ ocl::createDerivFilter_GPU
|
||||
------------------------------
|
||||
Creates a filter engine for the generalized Sobel operator.
|
||||
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT )
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
|
||||
|
||||
:param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported.
|
||||
|
||||
@ -348,6 +350,8 @@ Creates a filter engine for the generalized Sobel operator.
|
||||
|
||||
:param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
|
||||
|
||||
:param imgSize: Source image size to choose optimal method for processing.
|
||||
|
||||
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter`
|
||||
|
||||
|
||||
@ -405,7 +409,7 @@ ocl::createGaussianFilter_GPU
|
||||
---------------------------------
|
||||
Creates a Gaussian filter engine.
|
||||
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT)
|
||||
.. ocv:function:: Ptr<FilterEngine_GPU> ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1) )
|
||||
|
||||
:param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported.
|
||||
|
||||
@ -417,6 +421,8 @@ Creates a Gaussian filter engine.
|
||||
|
||||
:param bordertype: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`.
|
||||
|
||||
:param imgSize: Source image size to choose optimal method for processing.
|
||||
|
||||
.. seealso:: :ocv:func:`ocl::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter`
|
||||
|
||||
ocl::GaussianBlur
|
||||
|
@ -695,17 +695,17 @@ namespace cv
|
||||
|
||||
//! returns the separable linear filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat &rowKernel,
|
||||
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT);
|
||||
const Mat &columnKernel, const Point &anchor = Point(-1, -1), double delta = 0.0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
|
||||
|
||||
//! returns the separable filter engine with the specified filters
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU> &rowFilter,
|
||||
const Ptr<BaseColumnFilter_GPU> &columnFilter);
|
||||
|
||||
//! returns the Gaussian filter engine
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT);
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, int bordertype = BORDER_DEFAULT, Size imgSize = Size(-1,-1));
|
||||
|
||||
//! returns filter engine for the generalized Sobel operator
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT );
|
||||
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU( int srcType, int dstType, int dx, int dy, int ksize, int borderType = BORDER_DEFAULT, Size imgSize = Size(-1,-1) );
|
||||
|
||||
//! applies Laplacian operator to the image
|
||||
// supports only ksize = 1 and ksize = 3
|
||||
@ -1439,8 +1439,10 @@ namespace cv
|
||||
oclMat Dx_;
|
||||
oclMat Dy_;
|
||||
oclMat eig_;
|
||||
oclMat eig_minmax_;
|
||||
oclMat minMaxbuf_;
|
||||
oclMat tmpCorners_;
|
||||
oclMat counter_;
|
||||
};
|
||||
|
||||
inline GoodFeaturesToTrackDetector_OCL::GoodFeaturesToTrackDetector_OCL(int maxCorners_, double qualityLevel_, double minDistance_,
|
||||
@ -1533,6 +1535,110 @@ namespace cv
|
||||
int bytes;
|
||||
};
|
||||
|
||||
////////////////////////////////// ORB Descriptor Extractor //////////////////////////////////
|
||||
class CV_EXPORTS ORB_OCL
|
||||
{
|
||||
public:
|
||||
enum
|
||||
{
|
||||
X_ROW = 0,
|
||||
Y_ROW,
|
||||
RESPONSE_ROW,
|
||||
ANGLE_ROW,
|
||||
OCTAVE_ROW,
|
||||
SIZE_ROW,
|
||||
ROWS_COUNT
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
DEFAULT_FAST_THRESHOLD = 20
|
||||
};
|
||||
|
||||
//! Constructor
|
||||
explicit ORB_OCL(int nFeatures = 500, float scaleFactor = 1.2f, int nLevels = 8, int edgeThreshold = 31,
|
||||
int firstLevel = 0, int WTA_K = 2, int scoreType = 0, int patchSize = 31);
|
||||
|
||||
//! Compute the ORB features on an image
|
||||
//! image - the image to compute the features (supports only CV_8UC1 images)
|
||||
//! mask - the mask to apply
|
||||
//! keypoints - the resulting keypoints
|
||||
void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints);
|
||||
void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints);
|
||||
|
||||
//! Compute the ORB features and descriptors on an image
|
||||
//! image - the image to compute the features (supports only CV_8UC1 images)
|
||||
//! mask - the mask to apply
|
||||
//! keypoints - the resulting keypoints
|
||||
//! descriptors - descriptors array
|
||||
void operator ()(const oclMat& image, const oclMat& mask, std::vector<KeyPoint>& keypoints, oclMat& descriptors);
|
||||
void operator ()(const oclMat& image, const oclMat& mask, oclMat& keypoints, oclMat& descriptors);
|
||||
|
||||
//! download keypoints from device to host memory
|
||||
static void downloadKeyPoints(const oclMat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
//! convert keypoints to KeyPoint vector
|
||||
static void convertKeyPoints(const Mat& d_keypoints, std::vector<KeyPoint>& keypoints);
|
||||
|
||||
//! returns the descriptor size in bytes
|
||||
inline int descriptorSize() const { return kBytes; }
|
||||
inline int descriptorType() const { return CV_8U; }
|
||||
inline int defaultNorm() const { return NORM_HAMMING; }
|
||||
|
||||
inline void setFastParams(int threshold, bool nonmaxSupression = true)
|
||||
{
|
||||
fastDetector_.threshold = threshold;
|
||||
fastDetector_.nonmaxSupression = nonmaxSupression;
|
||||
}
|
||||
|
||||
//! release temporary buffer's memory
|
||||
void release();
|
||||
|
||||
//! if true, image will be blurred before descriptors calculation
|
||||
bool blurForDescriptor;
|
||||
|
||||
private:
|
||||
enum { kBytes = 32 };
|
||||
|
||||
void buildScalePyramids(const oclMat& image, const oclMat& mask);
|
||||
|
||||
void computeKeyPointsPyramid();
|
||||
|
||||
void computeDescriptors(oclMat& descriptors);
|
||||
|
||||
void mergeKeyPoints(oclMat& keypoints);
|
||||
|
||||
int nFeatures_;
|
||||
float scaleFactor_;
|
||||
int nLevels_;
|
||||
int edgeThreshold_;
|
||||
int firstLevel_;
|
||||
int WTA_K_;
|
||||
int scoreType_;
|
||||
int patchSize_;
|
||||
|
||||
// The number of desired features per scale
|
||||
std::vector<size_t> n_features_per_level_;
|
||||
|
||||
// Points to compute BRIEF descriptors from
|
||||
oclMat pattern_;
|
||||
|
||||
std::vector<oclMat> imagePyr_;
|
||||
std::vector<oclMat> maskPyr_;
|
||||
|
||||
oclMat buf_;
|
||||
|
||||
std::vector<oclMat> keyPointsPyr_;
|
||||
std::vector<int> keyPointsCount_;
|
||||
|
||||
FAST_OCL fastDetector_;
|
||||
|
||||
Ptr<ocl::FilterEngine_GPU> blurFilter;
|
||||
|
||||
oclMat d_keypoints_;
|
||||
|
||||
oclMat uMax_;
|
||||
};
|
||||
|
||||
/////////////////////////////// PyrLKOpticalFlow /////////////////////////////////////
|
||||
|
||||
class CV_EXPORTS PyrLKOpticalFlow
|
||||
|
@ -72,5 +72,5 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
::perf::TestBase::setModulePerformanceStrategy(::perf::PERF_STRATEGY_SIMPLE);
|
||||
|
||||
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls, dumpOpenCLDevice())
|
||||
CV_PERF_TEST_MAIN_INTERNALS(ocl, impls, ::dumpOpenCLDevice())
|
||||
}
|
||||
|
103
modules/ocl/perf/perf_orb.cpp
Normal file
103
modules/ocl/perf/perf_orb.cpp
Normal file
@ -0,0 +1,103 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
// Authors:
|
||||
// * Peter Andreas Entschev, peter@entschev.com
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
using namespace perf;
|
||||
|
||||
/////////////////// ORB ///////////////////
|
||||
|
||||
typedef std::tr1::tuple<std::string, int> Image_NFeatures_t;
|
||||
typedef perf::TestBaseWithParam<Image_NFeatures_t> Image_NFeatures;
|
||||
|
||||
PERF_TEST_P(Image_NFeatures, ORB,
|
||||
testing::Combine(testing::Values<string>("gpu/perf/aloe.png"),
|
||||
testing::Values(4000)))
|
||||
{
|
||||
declare.time(300.0);
|
||||
|
||||
const Image_NFeatures_t params = GetParam();
|
||||
const std::string imgFile = std::tr1::get<0>(params);
|
||||
const int nFeatures = std::tr1::get<1>(params);
|
||||
|
||||
const cv::Mat img = imread(getDataPath(imgFile), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
if (RUN_OCL_IMPL)
|
||||
{
|
||||
cv::ocl::ORB_OCL d_orb(nFeatures);
|
||||
|
||||
const cv::ocl::oclMat d_img(img);
|
||||
cv::ocl::oclMat d_keypoints, d_descriptors;
|
||||
|
||||
TEST_CYCLE() d_orb(d_img, cv::ocl::oclMat(), d_keypoints, d_descriptors);
|
||||
|
||||
std::vector<cv::KeyPoint> ocl_keypoints;
|
||||
d_orb.downloadKeyPoints(d_keypoints, ocl_keypoints);
|
||||
|
||||
cv::Mat ocl_descriptors(d_descriptors);
|
||||
|
||||
ocl_keypoints.resize(10);
|
||||
ocl_descriptors = ocl_descriptors.rowRange(0, 10);
|
||||
|
||||
sortKeyPoints(ocl_keypoints, ocl_descriptors);
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(ocl_keypoints, 1e-4);
|
||||
SANITY_CHECK(ocl_descriptors);
|
||||
}
|
||||
else if (RUN_PLAIN_IMPL)
|
||||
{
|
||||
cv::ORB orb(nFeatures);
|
||||
|
||||
std::vector<cv::KeyPoint> cpu_keypoints;
|
||||
cv::Mat cpu_descriptors;
|
||||
|
||||
TEST_CYCLE() orb(img, cv::noArray(), cpu_keypoints, cpu_descriptors);
|
||||
|
||||
SANITY_CHECK_KEYPOINTS(cpu_keypoints);
|
||||
SANITY_CHECK(cpu_descriptors);
|
||||
}
|
||||
else
|
||||
OCL_PERF_ELSE;
|
||||
}
|
@ -59,6 +59,8 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define CV_BUILD_OCL_MODULE
|
||||
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
@ -56,8 +56,19 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
{
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
int pixels_per_work_item = 1;
|
||||
|
||||
String build_options = format("-D DEPTH_%d", src.depth());
|
||||
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
|
||||
pixels_per_work_item = 4;
|
||||
else if (src.cols % 2 == 0)
|
||||
pixels_per_work_item = 2;
|
||||
else
|
||||
pixels_per_work_item = 1;
|
||||
}
|
||||
|
||||
String build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), src.oclchannels(), bidx, pixels_per_work_item);
|
||||
if (!additionalOptions.empty())
|
||||
build_options = build_options + additionalOptions;
|
||||
|
||||
@ -66,7 +77,6 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -77,6 +87,73 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
if (!data2.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void toHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(),
|
||||
const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
|
||||
{
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::string build_options = format("-D DEPTH_%d -D scn=%d -D bidx=%d", src.depth(), src.oclchannels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data1.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data1.data ));
|
||||
if (!data2.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data2.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void fromGray_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
@ -89,7 +166,50 @@ static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::
|
||||
static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
int pixels_per_work_item = 1;
|
||||
|
||||
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
if ((src.cols % 4 == 0) && (src.depth() == CV_8U))
|
||||
pixels_per_work_item = 4;
|
||||
else if (src.cols % 2 == 0)
|
||||
pixels_per_work_item = 2;
|
||||
else
|
||||
pixels_per_work_item = 1;
|
||||
}
|
||||
|
||||
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d -D pixels_per_work_item=%d", src.depth(), dst.channels(), bidx, pixels_per_work_item);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols/pixels_per_work_item, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void toRGB_NV12_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options = build_options + additionalOptions;
|
||||
|
||||
@ -101,7 +221,6 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -119,10 +238,13 @@ static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::st
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
static void fromHSV_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
|
||||
const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
|
||||
dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
|
||||
std::string build_options = format("-D DEPTH_%d -D dcn=%d -D bidx=%d", src.depth(), dst.channels(), bidx);
|
||||
if (!additionalOptions.empty())
|
||||
build_options += additionalOptions;
|
||||
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
@ -136,6 +258,36 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
if (!data.empty())
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&data.data ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
#else
|
||||
size_t lt[3] = { 16, 16, 1 };
|
||||
#endif
|
||||
openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
|
||||
}
|
||||
|
||||
static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
{
|
||||
int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
|
||||
|
||||
String build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s",
|
||||
src.depth(), dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_offset ));
|
||||
|
||||
size_t gt[3] = { dst.cols, dst.rows, 1 };
|
||||
#ifdef ANDROID
|
||||
size_t lt[3] = { 16, 10, 1 };
|
||||
@ -147,8 +299,8 @@ static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
|
||||
|
||||
static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
|
||||
src.depth(), greenbits, dst.channels());
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d -D bidx=%d",
|
||||
src.depth(), greenbits, dst.channels(), bidx);
|
||||
int src_offset = src.offset >> 1, src_step = src.step >> 1;
|
||||
int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
|
||||
|
||||
@ -157,7 +309,6 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -174,8 +325,8 @@ static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int gree
|
||||
|
||||
static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
|
||||
{
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
|
||||
src.depth(), greenbits, src.channels());
|
||||
String build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d -D bidx=%d",
|
||||
src.depth(), greenbits, src.channels(), bidx);
|
||||
int src_offset = (int)src.offset, src_step = (int)src.step;
|
||||
int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
|
||||
|
||||
@ -184,7 +335,6 @@ static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenb
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst_step));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&bidx));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset ));
|
||||
@ -272,7 +422,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
CV_Assert(scn == 1);
|
||||
dcn = code == COLOR_GRAY2BGRA ? 4 : 3;
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
toRGB_caller(src, dst, 0, "Gray2RGB");
|
||||
fromGray_caller(src, dst, 0, "Gray2RGB");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YUV: case COLOR_RGB2YUV:
|
||||
@ -303,7 +453,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
|
||||
Size dstSz(sz.width, sz.height * 2 / 3);
|
||||
dst.create(dstSz, CV_MAKETYPE(depth, dcn));
|
||||
toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
|
||||
toRGB_NV12_caller(src, dst, bidx, "YUV2RGBA_NV12");
|
||||
break;
|
||||
}
|
||||
case COLOR_BGR2YCrCb: case COLOR_RGB2YCrCb:
|
||||
@ -460,11 +610,11 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
|
||||
toHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
|
||||
return;
|
||||
}
|
||||
|
||||
fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
|
||||
toHSV_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
|
||||
break;
|
||||
}
|
||||
case COLOR_HSV2BGR: case COLOR_HSV2RGB: case COLOR_HSV2BGR_FULL: case COLOR_HSV2RGB_FULL:
|
||||
@ -483,7 +633,7 @@ static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
|
||||
dst.create(sz, CV_MAKETYPE(depth, dcn));
|
||||
|
||||
std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
|
||||
toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
|
||||
fromHSV_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
|
||||
break;
|
||||
}
|
||||
case COLOR_RGBA2mRGBA: case COLOR_mRGBA2RGBA:
|
||||
|
@ -169,7 +169,7 @@ void cv::ocl::fft_teardown()
|
||||
|
||||
// bake a new plan
|
||||
cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _depth, int _flags, FftType _type)
|
||||
: plHandle(0), dft_size(_dft_size), src_step(_src_step), depth(_depth), dst_step(_dst_step), flags(_flags), type(_type)
|
||||
: plHandle(0), dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), depth(_depth), flags(_flags), type(_type)
|
||||
{
|
||||
fft_setup();
|
||||
|
||||
|
@ -741,6 +741,135 @@ void cv::ocl::filter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &ke
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
const int optimizedSepFilterLocalSize = 16;
|
||||
static void sepFilter2D_SinglePass(const oclMat &src, oclMat &dst,
|
||||
const Mat &row_kernel, const Mat &col_kernel, int bordertype = BORDER_DEFAULT)
|
||||
{
|
||||
size_t lt2[3] = {optimizedSepFilterLocalSize, optimizedSepFilterLocalSize, 1};
|
||||
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
|
||||
|
||||
unsigned int src_pitch = src.step;
|
||||
unsigned int dst_pitch = dst.step;
|
||||
|
||||
int src_offset_x = (src.offset % src.step) / src.elemSize();
|
||||
int src_offset_y = src.offset / src.step;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&dst.rows ));
|
||||
|
||||
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUSX=%d -D RADIUSY=%d",(int)lt2[0], (int)lt2[1],
|
||||
row_kernel.rows / 2, col_kernel.rows / 2 );
|
||||
|
||||
option += " -D KERNEL_MATRIX_X=";
|
||||
for(int i=0; i<row_kernel.rows; i++)
|
||||
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &row_kernel.at<float>(i) ) );
|
||||
option += "0x0";
|
||||
|
||||
option += " -D KERNEL_MATRIX_Y=";
|
||||
for(int i=0; i<col_kernel.rows; i++)
|
||||
option += cv::format("0x%x,", *reinterpret_cast<const unsigned int*>( &col_kernel.at<float>(i) ) );
|
||||
option += "0x0";
|
||||
|
||||
switch(src.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
option += " -D SRCTYPE=uchar -D CONVERT_SRCTYPE=convert_float -D WORKTYPE=float";
|
||||
break;
|
||||
case CV_32FC1:
|
||||
option += " -D SRCTYPE=float -D CONVERT_SRCTYPE= -D WORKTYPE=float";
|
||||
break;
|
||||
case CV_8UC2:
|
||||
option += " -D SRCTYPE=uchar2 -D CONVERT_SRCTYPE=convert_float2 -D WORKTYPE=float2";
|
||||
break;
|
||||
case CV_32FC2:
|
||||
option += " -D SRCTYPE=float2 -D CONVERT_SRCTYPE= -D WORKTYPE=float2";
|
||||
break;
|
||||
case CV_8UC3:
|
||||
option += " -D SRCTYPE=uchar3 -D CONVERT_SRCTYPE=convert_float3 -D WORKTYPE=float3";
|
||||
break;
|
||||
case CV_32FC3:
|
||||
option += " -D SRCTYPE=float3 -D CONVERT_SRCTYPE= -D WORKTYPE=float3";
|
||||
break;
|
||||
case CV_8UC4:
|
||||
option += " -D SRCTYPE=uchar4 -D CONVERT_SRCTYPE=convert_float4 -D WORKTYPE=float4";
|
||||
break;
|
||||
case CV_32FC4:
|
||||
option += " -D SRCTYPE=float4 -D CONVERT_SRCTYPE= -D WORKTYPE=float4";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
|
||||
break;
|
||||
}
|
||||
switch(dst.type())
|
||||
{
|
||||
case CV_8UC1:
|
||||
option += " -D DSTTYPE=uchar -D CONVERT_DSTTYPE=convert_uchar_sat";
|
||||
break;
|
||||
case CV_8UC2:
|
||||
option += " -D DSTTYPE=uchar2 -D CONVERT_DSTTYPE=convert_uchar2_sat";
|
||||
break;
|
||||
case CV_8UC3:
|
||||
option += " -D DSTTYPE=uchar3 -D CONVERT_DSTTYPE=convert_uchar3_sat";
|
||||
break;
|
||||
case CV_8UC4:
|
||||
option += " -D DSTTYPE=uchar4 -D CONVERT_DSTTYPE=convert_uchar4_sat";
|
||||
break;
|
||||
case CV_32FC1:
|
||||
option += " -D DSTTYPE=float -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
case CV_32FC2:
|
||||
option += " -D DSTTYPE=float2 -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
case CV_32FC3:
|
||||
option += " -D DSTTYPE=float3 -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
case CV_32FC4:
|
||||
option += " -D DSTTYPE=float4 -D CONVERT_DSTTYPE=";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsUnsupportedFormat, "Image type is not supported!");
|
||||
break;
|
||||
}
|
||||
switch(bordertype)
|
||||
{
|
||||
case cv::BORDER_CONSTANT:
|
||||
option += " -D BORDER_CONSTANT";
|
||||
break;
|
||||
case cv::BORDER_REPLICATE:
|
||||
option += " -D BORDER_REPLICATE";
|
||||
break;
|
||||
case cv::BORDER_REFLECT:
|
||||
option += " -D BORDER_REFLECT";
|
||||
break;
|
||||
case cv::BORDER_REFLECT101:
|
||||
option += " -D BORDER_REFLECT_101";
|
||||
break;
|
||||
case cv::BORDER_WRAP:
|
||||
option += " -D BORDER_WRAP";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
|
||||
break;
|
||||
}
|
||||
|
||||
openCLExecuteKernel(src.clCxt, &filtering_sep_filter_singlepass, "sep_filter_singlepass", gt2, lt2, args,
|
||||
-1, -1, option.c_str() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SeparableFilter
|
||||
|
||||
@ -790,6 +919,35 @@ Ptr<FilterEngine_GPU> cv::ocl::createSeparableFilter_GPU(const Ptr<BaseRowFilter
|
||||
return makePtr<SeparableFilterEngine_GPU>(rowFilter, columnFilter);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class SingleStepSeparableFilterEngine_GPU : public FilterEngine_GPU
|
||||
{
|
||||
public:
|
||||
SingleStepSeparableFilterEngine_GPU( const Mat &rowKernel_, const Mat &columnKernel_, const int btype )
|
||||
{
|
||||
bordertype = btype;
|
||||
rowKernel = rowKernel_;
|
||||
columnKernel = columnKernel_;
|
||||
}
|
||||
|
||||
virtual void apply(const oclMat &src, oclMat &dst, Rect roi = Rect(0, 0, -1, -1))
|
||||
{
|
||||
normalizeROI(roi, Size(rowKernel.rows, columnKernel.rows), Point(-1,-1), src.size());
|
||||
|
||||
oclMat srcROI = src(roi);
|
||||
oclMat dstROI = dst(roi);
|
||||
|
||||
sepFilter2D_SinglePass(src, dst, rowKernel, columnKernel, bordertype);
|
||||
}
|
||||
|
||||
Mat rowKernel;
|
||||
Mat columnKernel;
|
||||
int bordertype;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
static void GPUFilterBox(const oclMat &src, oclMat &dst,
|
||||
Size &ksize, const Point anchor, const int borderType)
|
||||
{
|
||||
@ -1243,17 +1401,32 @@ Ptr<BaseColumnFilter_GPU> cv::ocl::getLinearColumnFilter_GPU(int /*bufType*/, in
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createSeparableLinearFilter_GPU(int srcType, int dstType,
|
||||
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype)
|
||||
const Mat &rowKernel, const Mat &columnKernel, const Point &anchor, double delta, int bordertype, Size imgSize )
|
||||
{
|
||||
int sdepth = CV_MAT_DEPTH(srcType), ddepth = CV_MAT_DEPTH(dstType);
|
||||
int cn = CV_MAT_CN(srcType);
|
||||
int bdepth = std::max(std::max(sdepth, ddepth), CV_32F);
|
||||
int bufType = CV_MAKETYPE(bdepth, cn);
|
||||
Context* clCxt = Context::getContext();
|
||||
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
|
||||
//if image size is non-degenerate and large enough
|
||||
//and if filter support is reasonable to satisfy larger local memory requirements,
|
||||
//then we can use single pass routine to avoid extra runtime calls overhead
|
||||
if( clCxt && clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) &&
|
||||
rowKernel.rows <= 21 && columnKernel.rows <= 21 &&
|
||||
(rowKernel.rows & 1) == 1 && (columnKernel.rows & 1) == 1 &&
|
||||
imgSize.width > optimizedSepFilterLocalSize + (rowKernel.rows>>1) &&
|
||||
imgSize.height > optimizedSepFilterLocalSize + (columnKernel.rows>>1) )
|
||||
{
|
||||
return Ptr<FilterEngine_GPU>(new SingleStepSeparableFilterEngine_GPU(rowKernel, columnKernel, bordertype));
|
||||
}
|
||||
else
|
||||
{
|
||||
Ptr<BaseRowFilter_GPU> rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, bordertype);
|
||||
Ptr<BaseColumnFilter_GPU> columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, bordertype, delta);
|
||||
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter);
|
||||
return createSeparableFilter_GPU(rowFilter, columnFilter);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat &kernelX, const Mat &kernelY, Point anchor, double delta, int bordertype)
|
||||
@ -1277,16 +1450,16 @@ void cv::ocl::sepFilter2D(const oclMat &src, oclMat &dst, int ddepth, const Mat
|
||||
|
||||
dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels()));
|
||||
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype);
|
||||
Ptr<FilterEngine_GPU> f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, delta, bordertype, src.size());
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType)
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int borderType, Size imgSize )
|
||||
{
|
||||
Mat kx, ky;
|
||||
getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F);
|
||||
return createSeparableLinearFilter_GPU(srcType, dstType,
|
||||
kx, ky, Point(-1, -1), 0, borderType);
|
||||
kx, ky, Point(-1, -1), 0, borderType, imgSize);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -1356,7 +1529,7 @@ void cv::ocl::Laplacian(const oclMat &src, oclMat &dst, int ddepth, int ksize, d
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Gaussian Filter
|
||||
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype)
|
||||
Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int bordertype, Size imgSize)
|
||||
{
|
||||
int depth = CV_MAT_DEPTH(type);
|
||||
|
||||
@ -1383,7 +1556,7 @@ Ptr<FilterEngine_GPU> cv::ocl::createGaussianFilter_GPU(int type, Size ksize, do
|
||||
else
|
||||
ky = getGaussianKernel(ksize.height, sigma2, std::max(depth, CV_32F));
|
||||
|
||||
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype);
|
||||
return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1, -1), 0.0, bordertype, imgSize);
|
||||
}
|
||||
|
||||
void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double sigma1, double sigma2, int bordertype)
|
||||
@ -1419,7 +1592,7 @@ void cv::ocl::GaussianBlur(const oclMat &src, oclMat &dst, Size ksize, double si
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype);
|
||||
Ptr<FilterEngine_GPU> f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, bordertype, src.size());
|
||||
f->apply(src, dst);
|
||||
}
|
||||
|
||||
|
@ -48,154 +48,142 @@
|
||||
using namespace cv;
|
||||
using namespace cv::ocl;
|
||||
|
||||
// currently sort procedure on the host is more efficient
|
||||
static bool use_cpu_sorter = true;
|
||||
|
||||
namespace
|
||||
// compact structure for corners
|
||||
struct DefCorner
|
||||
{
|
||||
enum SortMethod
|
||||
float eig; //eigenvalue of corner
|
||||
short x; //x coordinate of corner point
|
||||
short y; //y coordinate of corner point
|
||||
} ;
|
||||
|
||||
// compare procedure for corner
|
||||
//it is used for sort on the host side
|
||||
struct DefCornerCompare
|
||||
{
|
||||
CPU_STL,
|
||||
BITONIC,
|
||||
SELECTION
|
||||
};
|
||||
|
||||
const int GROUP_SIZE = 256;
|
||||
|
||||
template<SortMethod method>
|
||||
struct Sorter
|
||||
{
|
||||
//typedef EigType;
|
||||
};
|
||||
|
||||
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
|
||||
template<>
|
||||
struct Sorter<CPU_STL>
|
||||
{
|
||||
typedef oclMat EigType;
|
||||
static cv::Mutex cs;
|
||||
static Mat mat_eig;
|
||||
|
||||
//prototype
|
||||
static int clfloat2Gt(cl_float2 pt1, cl_float2 pt2)
|
||||
bool operator()(const DefCorner a, const DefCorner b) const
|
||||
{
|
||||
float v1 = mat_eig.at<float>(cvRound(pt1.s[1]), cvRound(pt1.s[0]));
|
||||
float v2 = mat_eig.at<float>(cvRound(pt2.s[1]), cvRound(pt2.s[0]));
|
||||
return v1 > v2;
|
||||
}
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
{
|
||||
cv::AutoLock lock(cs);
|
||||
//temporarily use STL's sort function
|
||||
Mat mat_corners = corners;
|
||||
mat_eig = eig_tex;
|
||||
std::sort(mat_corners.begin<cl_float2>(), mat_corners.begin<cl_float2>() + count, clfloat2Gt);
|
||||
corners = mat_corners;
|
||||
return a.eig > b.eig;
|
||||
}
|
||||
};
|
||||
cv::Mutex Sorter<CPU_STL>::cs;
|
||||
cv::Mat Sorter<CPU_STL>::mat_eig;
|
||||
|
||||
template<>
|
||||
struct Sorter<BITONIC>
|
||||
// sort corner point using opencl bitonicosrt implementation
|
||||
static void sortCorners_caller(oclMat& corners, const int count)
|
||||
{
|
||||
typedef TextureCL EigType;
|
||||
Context * cxt = Context::getContext();
|
||||
int GS = count/2;
|
||||
int LS = min(255,GS);
|
||||
size_t globalThreads[3] = {GS, 1, 1};
|
||||
size_t localThreads[3] = {LS, 1, 1};
|
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
// 2^numStages should be equal to count or the output is invalid
|
||||
int numStages = 0;
|
||||
for(int i = count; i > 1; i >>= 1)
|
||||
{
|
||||
Context * cxt = Context::getContext();
|
||||
size_t globalThreads[3] = {count / 2, 1, 1};
|
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||
|
||||
// 2^numStages should be equal to count or the output is invalid
|
||||
int numStages = 0;
|
||||
for(int i = count; i > 1; i >>= 1)
|
||||
++numStages;
|
||||
}
|
||||
const int argc = 4;
|
||||
std::vector< std::pair<size_t, const void *> > args(argc);
|
||||
std::string kernelname = "sortCorners_bitonicSort";
|
||||
args[0] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
|
||||
args[1] = std::make_pair(sizeof(cl_int), (void *)&count);
|
||||
for(int stage = 0; stage < numStages; ++stage)
|
||||
{
|
||||
args[2] = std::make_pair(sizeof(cl_int), (void *)&stage);
|
||||
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
|
||||
{
|
||||
++numStages;
|
||||
}
|
||||
const int argc = 5;
|
||||
std::vector< std::pair<size_t, const void *> > args(argc);
|
||||
String kernelname = "sortCorners_bitonicSort";
|
||||
args[0] = std::make_pair(sizeof(cl_mem), (void *)&eig_tex);
|
||||
args[1] = std::make_pair(sizeof(cl_mem), (void *)&corners.data);
|
||||
args[2] = std::make_pair(sizeof(cl_int), (void *)&count);
|
||||
for(int stage = 0; stage < numStages; ++stage)
|
||||
{
|
||||
args[3] = std::make_pair(sizeof(cl_int), (void *)&stage);
|
||||
for(int passOfStage = 0; passOfStage < stage + 1; ++passOfStage)
|
||||
{
|
||||
args[4] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
args[3] = std::make_pair(sizeof(cl_int), (void *)&passOfStage);
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
template<>
|
||||
struct Sorter<SELECTION>
|
||||
{
|
||||
typedef TextureCL EigType;
|
||||
|
||||
static void sortCorners_caller(const EigType& eig_tex, oclMat& corners, const int count)
|
||||
{
|
||||
Context * cxt = Context::getContext();
|
||||
|
||||
size_t globalThreads[3] = {count, 1, 1};
|
||||
size_t localThreads[3] = {GROUP_SIZE, 1, 1};
|
||||
|
||||
std::vector< std::pair<size_t, const void *> > args;
|
||||
//local
|
||||
String kernelname = "sortCorners_selectionSortLocal";
|
||||
int lds_size = GROUP_SIZE * sizeof(cl_float2);
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&eig_tex) );
|
||||
args.push_back( std::make_pair( sizeof(cl_mem), (void*)&corners.data) );
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void*)&count) );
|
||||
args.push_back( std::make_pair( lds_size, (void*)NULL) );
|
||||
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
|
||||
//final
|
||||
kernelname = "sortCorners_selectionSortFinal";
|
||||
args.pop_back();
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1);
|
||||
}
|
||||
};
|
||||
|
||||
int findCorners_caller(
|
||||
const TextureCL& eig,
|
||||
const float threshold,
|
||||
const oclMat& mask,
|
||||
oclMat& corners,
|
||||
const int max_count)
|
||||
// find corners on matrix and put it into array
|
||||
static void findCorners_caller(
|
||||
const oclMat& eig_mat, //input matrix worth eigenvalues
|
||||
oclMat& eigMinMax, //input with min and max values of eigenvalues
|
||||
const float qualityLevel,
|
||||
const oclMat& mask,
|
||||
oclMat& corners, //output array with detected corners
|
||||
oclMat& counter) //output value with number of detected corners, have to be 0 before call
|
||||
{
|
||||
String opt;
|
||||
std::vector<int> k;
|
||||
Context * cxt = Context::getContext();
|
||||
|
||||
std::vector< std::pair<size_t, const void*> > args;
|
||||
String kernelname = "findCorners";
|
||||
|
||||
const int mask_strip = mask.step / mask.elemSize1();
|
||||
|
||||
oclMat g_counter(1, 1, CV_32SC1);
|
||||
g_counter.setTo(0);
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&(eig_mat.data)));
|
||||
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eig ));
|
||||
int src_pitch = (int)eig_mat.step;
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&src_pitch ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&mask.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&corners.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&mask_strip));
|
||||
args.push_back(std::make_pair( sizeof(cl_float), (void*)&threshold ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.rows ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig.cols ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&max_count ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&g_counter.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&eigMinMax.data ));
|
||||
args.push_back(std::make_pair( sizeof(cl_float), (void*)&qualityLevel ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.rows ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&eig_mat.cols ));
|
||||
args.push_back(std::make_pair( sizeof(cl_int), (void*)&corners.cols ));
|
||||
args.push_back(std::make_pair( sizeof(cl_mem), (void*)&counter.data ));
|
||||
|
||||
size_t globalThreads[3] = {eig.cols, eig.rows, 1};
|
||||
size_t globalThreads[3] = {eig_mat.cols, eig_mat.rows, 1};
|
||||
size_t localThreads[3] = {16, 16, 1};
|
||||
if(!mask.empty())
|
||||
opt += " -D WITH_MASK=1";
|
||||
|
||||
const char * opt = mask.empty() ? "" : "-D WITH_MASK";
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, kernelname, globalThreads, localThreads, args, -1, -1, opt);
|
||||
return std::min(Mat(g_counter).at<int>(0), max_count);
|
||||
openCLExecuteKernel(cxt, &imgproc_gftt, "findCorners", globalThreads, localThreads, args, -1, -1, opt.c_str());
|
||||
}
|
||||
|
||||
|
||||
static void minMaxEig_caller(const oclMat &src, oclMat &dst, oclMat & tozero)
|
||||
{
|
||||
size_t groupnum = src.clCxt->getDeviceInfo().maxComputeUnits;
|
||||
CV_Assert(groupnum != 0);
|
||||
|
||||
int dbsize = groupnum * 2 * src.elemSize();
|
||||
|
||||
ensureSizeIsEnough(1, dbsize, CV_8UC1, dst);
|
||||
|
||||
cl_mem dst_data = reinterpret_cast<cl_mem>(dst.data);
|
||||
|
||||
int all_cols = src.step / src.elemSize();
|
||||
int pre_cols = (src.offset % src.step) / src.elemSize();
|
||||
int sec_cols = all_cols - (src.offset % src.step + src.cols * src.elemSize() - 1) / src.elemSize() - 1;
|
||||
int invalid_cols = pre_cols + sec_cols;
|
||||
int cols = all_cols - invalid_cols , elemnum = cols * src.rows;
|
||||
int offset = src.offset / src.elemSize();
|
||||
|
||||
{// first parallel pass
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&invalid_cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&offset));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&elemnum));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum));
|
||||
size_t globalThreads[3] = {groupnum * 256, 1, 1};
|
||||
size_t localThreads[3] = {256, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &arithm_minMax, "arithm_op_minMax", globalThreads, localThreads,
|
||||
args, -1, -1, "-D T=float -D DEPTH_5");
|
||||
}
|
||||
|
||||
{// run final "serial" kernel to find accumulate results from threads and reset corner counter
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&dst_data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&groupnum ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&tozero.data ));
|
||||
size_t globalThreads[3] = {1, 1, 1};
|
||||
size_t localThreads[3] = {1, 1, 1};
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_gftt, "arithm_op_minMax_final", globalThreads, localThreads,
|
||||
args, -1, -1);
|
||||
}
|
||||
}
|
||||
}//unnamed namespace
|
||||
|
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image, oclMat& corners, const oclMat& mask)
|
||||
{
|
||||
@ -205,67 +193,99 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
|
||||
ensureSizeIsEnough(image.size(), CV_32F, eig_);
|
||||
|
||||
if (useHarrisDetector)
|
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
||||
cornerHarris_dxdy(image, eig_, Dx_, Dy_, blockSize, 3, harrisK);
|
||||
else
|
||||
cornerMinEigenVal_dxdy(image, eig_, Dx_, Dy_, blockSize, 3);
|
||||
|
||||
double maxVal = 0;
|
||||
minMax(eig_, NULL, &maxVal);
|
||||
ensureSizeIsEnough(1,1, CV_32SC1, counter_);
|
||||
|
||||
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
|
||||
// find max eigenvalue and reset detected counters
|
||||
minMaxEig_caller(eig_,eig_minmax_,counter_);
|
||||
|
||||
Ptr<TextureCL> eig_tex = bindTexturePtr(eig_);
|
||||
int total = findCorners_caller(
|
||||
*eig_tex,
|
||||
static_cast<float>(maxVal * qualityLevel),
|
||||
// allocate buffer for kernels
|
||||
int corner_array_size = std::max(1024, static_cast<int>(image.size().area() * 0.05));
|
||||
|
||||
if(!use_cpu_sorter)
|
||||
{ // round to 2^n
|
||||
unsigned int n=1;
|
||||
for(n=1;n<(unsigned int)corner_array_size;n<<=1) ;
|
||||
corner_array_size = (int)n;
|
||||
|
||||
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
|
||||
|
||||
// set to 0 to be able use bitonic sort on whole 2^n array
|
||||
tmpCorners_.setTo(0);
|
||||
}
|
||||
else
|
||||
{
|
||||
ensureSizeIsEnough(1, corner_array_size , CV_32FC2, tmpCorners_);
|
||||
}
|
||||
|
||||
int total = tmpCorners_.cols; // by default the number of corner is full array
|
||||
std::vector<DefCorner> tmp(tmpCorners_.cols); // input buffer with corner for HOST part of algorithm
|
||||
|
||||
//find points with high eigenvalue and put it into the output array
|
||||
findCorners_caller(
|
||||
eig_,
|
||||
eig_minmax_,
|
||||
static_cast<float>(qualityLevel),
|
||||
mask,
|
||||
tmpCorners_,
|
||||
tmpCorners_.cols);
|
||||
counter_);
|
||||
|
||||
if(!use_cpu_sorter)
|
||||
{// sort detected corners on deivce side
|
||||
sortCorners_caller(tmpCorners_, corner_array_size);
|
||||
}
|
||||
else
|
||||
{// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
|
||||
openCLVerifyCall(clEnqueueReadBuffer(getClCommandQueue(counter_.clCxt), (cl_mem)counter_.data, CL_FALSE, 0,sizeof(int), &total, 0, NULL, NULL));
|
||||
}
|
||||
|
||||
//blocking read whole corners array (sorted or not sorted)
|
||||
openCLReadBuffer(tmpCorners_.clCxt,(cl_mem)tmpCorners_.data,&tmp[0],tmpCorners_.cols*sizeof(DefCorner));
|
||||
|
||||
if (total == 0)
|
||||
{
|
||||
{// check for trivial case
|
||||
corners.release();
|
||||
return;
|
||||
}
|
||||
|
||||
if(use_cpu_sorter)
|
||||
{
|
||||
Sorter<CPU_STL>::sortCorners_caller(eig_, tmpCorners_, total);
|
||||
}
|
||||
else
|
||||
{
|
||||
//if total is power of 2
|
||||
if(((total - 1) & (total)) == 0)
|
||||
{
|
||||
Sorter<BITONIC>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||
}
|
||||
else
|
||||
{
|
||||
Sorter<SELECTION>::sortCorners_caller(*eig_tex, tmpCorners_, total);
|
||||
}
|
||||
{// sort detected corners on cpu side.
|
||||
tmp.resize(total);
|
||||
std::sort(tmp.begin(), tmp.end(), DefCornerCompare());
|
||||
}
|
||||
|
||||
//estimate maximal size of final output array
|
||||
int total_max = maxCorners > 0 ? std::min(maxCorners, total) : total;
|
||||
int D2 = (int)ceil(minDistance * minDistance);
|
||||
// allocate output buffer
|
||||
std::vector<Point2f> tmp2;
|
||||
tmp2.reserve(total_max);
|
||||
|
||||
|
||||
if (minDistance < 1)
|
||||
{
|
||||
Rect roi_range(0, 0, maxCorners > 0 ? std::min(maxCorners, total) : total, 1);
|
||||
tmpCorners_(roi_range).copyTo(corners);
|
||||
{// we have not distance restriction. then just copy with conversion maximal allowed points into output array
|
||||
for(int i=0;i<total_max && tmp[i].eig>0.0f;++i)
|
||||
{
|
||||
tmp2.push_back(Point2f(tmp[i].x,tmp[i].y));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<Point2f> tmp(total);
|
||||
downloadPoints(tmpCorners_, tmp);
|
||||
|
||||
std::vector<Point2f> tmp2;
|
||||
tmp2.reserve(total);
|
||||
|
||||
{// we have distance restriction. then start coping to output array from the first element and check distance for each next one
|
||||
const int cell_size = cvRound(minDistance);
|
||||
const int grid_width = (image.cols + cell_size - 1) / cell_size;
|
||||
const int grid_height = (image.rows + cell_size - 1) / cell_size;
|
||||
|
||||
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
|
||||
std::vector< std::vector<Point2i> > grid(grid_width * grid_height);
|
||||
|
||||
for (int i = 0; i < total; ++i)
|
||||
for (int i = 0; i < total ; ++i)
|
||||
{
|
||||
Point2f p = tmp[i];
|
||||
DefCorner p = tmp[i];
|
||||
|
||||
if(p.eig<=0.0f)
|
||||
break; // condition to stop that is needed for GPU bitonic sort usage.
|
||||
|
||||
bool good = true;
|
||||
|
||||
@ -287,40 +307,42 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
|
||||
{
|
||||
for (int xx = x1; xx <= x2; xx++)
|
||||
{
|
||||
std::vector<Point2f>& m = grid[yy * grid_width + xx];
|
||||
|
||||
if (!m.empty())
|
||||
std::vector<Point2i>& m = grid[yy * grid_width + xx];
|
||||
if (m.empty())
|
||||
continue;
|
||||
for(size_t j = 0; j < m.size(); j++)
|
||||
{
|
||||
for(size_t j = 0; j < m.size(); j++)
|
||||
{
|
||||
float dx = p.x - m[j].x;
|
||||
float dy = p.y - m[j].y;
|
||||
int dx = p.x - m[j].x;
|
||||
int dy = p.y - m[j].y;
|
||||
|
||||
if (dx * dx + dy * dy < minDistance * minDistance)
|
||||
{
|
||||
good = false;
|
||||
goto break_out;
|
||||
}
|
||||
if (dx * dx + dy * dy < D2)
|
||||
{
|
||||
good = false;
|
||||
goto break_out_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
break_out:
|
||||
break_out_:
|
||||
|
||||
if(good)
|
||||
{
|
||||
grid[y_cell * grid_width + x_cell].push_back(p);
|
||||
grid[y_cell * grid_width + x_cell].push_back(Point2i(p.x,p.y));
|
||||
|
||||
tmp2.push_back(p);
|
||||
tmp2.push_back(Point2f(p.x,p.y));
|
||||
|
||||
if (maxCorners > 0 && tmp2.size() == static_cast<size_t>(maxCorners))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
|
||||
}
|
||||
int final_size = static_cast<int>(tmp2.size());
|
||||
if(final_size>0)
|
||||
corners.upload(Mat(1, final_size, CV_32FC2, &tmp2[0]));
|
||||
else
|
||||
corners.release();
|
||||
}
|
||||
void cv::ocl::GoodFeaturesToTrackDetector_OCL::downloadPoints(const oclMat &points, std::vector<Point2f> &points_v)
|
||||
{
|
||||
|
@ -866,16 +866,17 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
|
||||
if(gcascade->is_stump_based && gsum.clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
//setup local group size
|
||||
localThreads[0] = 8;
|
||||
localThreads[1] = 16;
|
||||
//setup local group size for "pixel step" = 1
|
||||
localThreads[0] = 16;
|
||||
localThreads[1] = 32;
|
||||
localThreads[2] = 1;
|
||||
|
||||
//init maximal number of workgroups
|
||||
//calc maximal number of workgroups
|
||||
int WGNumX = 1+(sizev[0].width /(localThreads[0]));
|
||||
int WGNumY = 1+(sizev[0].height/(localThreads[1]));
|
||||
int WGNumZ = loopcount;
|
||||
int WGNum = 0; //accurate number of non -empty workgroups
|
||||
int WGNumTotal = 0; //accurate number of non-empty workgroups
|
||||
int WGNumSampled = 0; //accurate number of workgroups processed only 1/4 part of all pixels. it is made for large images with scale <= 2
|
||||
oclMat oclWGInfo(1,sizeof(cl_int4) * WGNumX*WGNumY*WGNumZ,CV_8U);
|
||||
{
|
||||
cl_int4* pWGInfo = (cl_int4*)clEnqueueMapBuffer(getClCommandQueue(oclWGInfo.clCxt),(cl_mem)oclWGInfo.datastart,true,CL_MAP_WRITE, 0, oclWGInfo.step, 0,0,0,&status);
|
||||
@ -895,12 +896,16 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
if(gx>=(Width-cascade->orig_window_size.width))
|
||||
continue; // no data to process
|
||||
|
||||
if(scaleinfo[z].factor<=2)
|
||||
{
|
||||
WGNumSampled++;
|
||||
}
|
||||
// save no-empty workgroup info into array
|
||||
pWGInfo[WGNum].s[0] = scaleinfo[z].width_height;
|
||||
pWGInfo[WGNum].s[1] = (gx << 16) | gy;
|
||||
pWGInfo[WGNum].s[2] = scaleinfo[z].imgoff;
|
||||
memcpy(&(pWGInfo[WGNum].s[3]),&(scaleinfo[z].factor),sizeof(float));
|
||||
WGNum++;
|
||||
pWGInfo[WGNumTotal].s[0] = scaleinfo[z].width_height;
|
||||
pWGInfo[WGNumTotal].s[1] = (gx << 16) | gy;
|
||||
pWGInfo[WGNumTotal].s[2] = scaleinfo[z].imgoff;
|
||||
memcpy(&(pWGInfo[WGNumTotal].s[3]),&(scaleinfo[z].factor),sizeof(float));
|
||||
WGNumTotal++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -908,13 +913,8 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
pWGInfo = NULL;
|
||||
}
|
||||
|
||||
// setup global sizes to have linear array of workgroups with WGNum size
|
||||
globalThreads[0] = localThreads[0]*WGNum;
|
||||
globalThreads[1] = localThreads[1];
|
||||
globalThreads[2] = 1;
|
||||
|
||||
#define NODE_SIZE 12
|
||||
// pack node info to have less memory loads
|
||||
// pack node info to have less memory loads on the device side
|
||||
oclMat oclNodesPK(1,sizeof(cl_int) * NODE_SIZE * nodenum,CV_8U);
|
||||
{
|
||||
cl_int status;
|
||||
@ -963,8 +963,6 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
options += format(" -D WND_SIZE_X=%d",cascade->orig_window_size.width);
|
||||
options += format(" -D WND_SIZE_Y=%d",cascade->orig_window_size.height);
|
||||
options += format(" -D STUMP_BASED=%d",gcascade->is_stump_based);
|
||||
options += format(" -D LSx=%d",localThreads[0]);
|
||||
options += format(" -D LSy=%d",localThreads[1]);
|
||||
options += format(" -D SPLITNODE=%d",splitnode);
|
||||
options += format(" -D SPLITSTAGE=%d",splitstage);
|
||||
options += format(" -D OUTPUTSZ=%d",outputsz);
|
||||
@ -972,8 +970,39 @@ void OclCascadeClassifier::detectMultiScale(oclMat &gimg, CV_OUT std::vector<cv:
|
||||
// init candiate global count by 0
|
||||
int pattern = 0;
|
||||
openCLSafeCall(clEnqueueWriteBuffer(qu, candidatebuffer, 1, 0, 1 * sizeof(pattern),&pattern, 0, NULL, NULL));
|
||||
// execute face detector
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, localThreads, args, -1, -1, options.c_str());
|
||||
|
||||
if(WGNumTotal>WGNumSampled)
|
||||
{// small images and each pixel is processed
|
||||
// setup global sizes to have linear array of workgroups with WGNum size
|
||||
int pixelstep = 1;
|
||||
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
|
||||
globalThreads[0] = LS[0]*(WGNumTotal-WGNumSampled);
|
||||
globalThreads[1] = LS[1];
|
||||
globalThreads[2] = 1;
|
||||
String options1 = options;
|
||||
options1 += format(" -D PIXEL_STEP=%d",pixelstep);
|
||||
options1 += format(" -D WGSTART=%d",WGNumSampled);
|
||||
options1 += format(" -D LSx=%d",LS[0]);
|
||||
options1 += format(" -D LSy=%d",LS[1]);
|
||||
// execute face detector
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options1.c_str());
|
||||
}
|
||||
if(WGNumSampled>0)
|
||||
{// large images each 4th pixel is processed
|
||||
// setup global sizes to have linear array of workgroups with WGNum size
|
||||
int pixelstep = 2;
|
||||
size_t LS[3]={localThreads[0]/pixelstep,localThreads[1]/pixelstep,1};
|
||||
globalThreads[0] = LS[0]*WGNumSampled;
|
||||
globalThreads[1] = LS[1];
|
||||
globalThreads[2] = 1;
|
||||
String options2 = options;
|
||||
options2 += format(" -D PIXEL_STEP=%d",pixelstep);
|
||||
options2 += format(" -D WGSTART=%d",0);
|
||||
options2 += format(" -D LSx=%d",LS[0]);
|
||||
options2 += format(" -D LSy=%d",LS[1]);
|
||||
// execute face detector
|
||||
openCLExecuteKernel(gsum.clCxt, &haarobjectdetect, "gpuRunHaarClassifierCascadePacked", globalThreads, LS, args, -1, -1, options2.c_str());
|
||||
}
|
||||
//read candidate buffer back and put it into host list
|
||||
openCLReadBuffer( gsum.clCxt, candidatebuffer, candidate, 4 * sizeof(int)*outputsz );
|
||||
assert(candidate[0]<outputsz);
|
||||
|
@ -76,6 +76,11 @@ namespace cv
|
||||
int cdescr_width;
|
||||
int cdescr_height;
|
||||
|
||||
// A shift value and type that allows qangle to be different
|
||||
// sizes on different hardware
|
||||
int qangle_step_shift;
|
||||
int qangle_type;
|
||||
|
||||
void set_up_constants(int nbins, int block_stride_x, int block_stride_y,
|
||||
int nblocks_win_x, int nblocks_win_y);
|
||||
|
||||
@ -153,6 +158,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
|
||||
hog_device_cpu = true;
|
||||
else
|
||||
hog_device_cpu = false;
|
||||
|
||||
}
|
||||
|
||||
size_t cv::ocl::HOGDescriptor::getDescriptorSize() const
|
||||
@ -213,7 +219,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
|
||||
effect_size = img.size();
|
||||
|
||||
grad.create(img.size(), CV_32FC2);
|
||||
qangle.create(img.size(), CV_8UC2);
|
||||
qangle.create(img.size(), hog::qangle_type);
|
||||
|
||||
const size_t block_hist_size = getBlockHistogramSize();
|
||||
const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride);
|
||||
@ -1606,6 +1612,16 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
|
||||
|
||||
int descr_size = descr_width * nblocks_win_y;
|
||||
cdescr_size = descr_size;
|
||||
|
||||
qangle_type = CV_8UC2;
|
||||
qangle_step_shift = 0;
|
||||
// Some Intel devices have low single-byte access performance,
|
||||
// so we change the datatype here.
|
||||
if (Context::getContext()->supportsFeature(FEATURE_CL_INTEL_DEVICE))
|
||||
{
|
||||
qangle_type = CV_32SC2;
|
||||
qangle_step_shift = 2;
|
||||
}
|
||||
}
|
||||
|
||||
void cv::ocl::device::hog::compute_hists(int nbins,
|
||||
@ -1627,7 +1643,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
|
||||
int blocks_total = img_block_width * img_block_height;
|
||||
|
||||
int grad_quadstep = grad.step >> 2;
|
||||
int qangle_step = qangle.step;
|
||||
int qangle_step = qangle.step >> qangle_step_shift;
|
||||
|
||||
int blocks_in_group = 4;
|
||||
size_t localThreads[3] = { blocks_in_group * 24, 2, 1 };
|
||||
@ -1892,7 +1908,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
|
||||
char correctGamma = (correct_gamma) ? 1 : 0;
|
||||
int img_step = img.step;
|
||||
int grad_quadstep = grad.step >> 3;
|
||||
int qangle_step = qangle.step >> 1;
|
||||
int qangle_step = qangle.step >> (1 + qangle_step_shift);
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
|
||||
@ -1927,7 +1943,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
|
||||
char correctGamma = (correct_gamma) ? 1 : 0;
|
||||
int img_step = img.step >> 2;
|
||||
int grad_quadstep = grad.step >> 3;
|
||||
int qangle_step = qangle.step >> 1;
|
||||
int qangle_step = qangle.step >> (1 + qangle_step_shift);
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&height));
|
||||
args.push_back( std::make_pair( sizeof(cl_int), (void *)&width));
|
||||
|
@ -1035,67 +1035,117 @@ namespace cv
|
||||
else
|
||||
scale = 1. / scale;
|
||||
|
||||
if (ksize > 0)
|
||||
const int sobel_lsz = 16;
|
||||
if((src.type() == CV_8UC1 || src.type() == CV_32FC1) &&
|
||||
(ksize==3 || ksize==5 || ksize==7 || ksize==-1) &&
|
||||
src.wholerows > sobel_lsz + (ksize>>1) &&
|
||||
src.wholecols > sobel_lsz + (ksize>>1))
|
||||
{
|
||||
Context* clCxt = Context::getContext();
|
||||
if(clCxt->supportsFeature(FEATURE_CL_INTEL_DEVICE) && src.type() == CV_8UC1 &&
|
||||
src.cols % 8 == 0 && src.rows % 8 == 0 &&
|
||||
ksize==3 &&
|
||||
(borderType ==cv::BORDER_REFLECT ||
|
||||
borderType == cv::BORDER_REPLICATE ||
|
||||
borderType ==cv::BORDER_REFLECT101 ||
|
||||
borderType ==cv::BORDER_WRAP))
|
||||
Dx.create(src.size(), CV_32FC1);
|
||||
Dy.create(src.size(), CV_32FC1);
|
||||
|
||||
CV_Assert(Dx.rows == Dy.rows && Dx.cols == Dy.cols);
|
||||
|
||||
size_t lt2[3] = {sobel_lsz, sobel_lsz, 1};
|
||||
size_t gt2[3] = {lt2[0]*(1 + (src.cols-1) / lt2[0]), lt2[1]*(1 + (src.rows-1) / lt2[1]), 1};
|
||||
|
||||
unsigned int src_pitch = src.step;
|
||||
unsigned int Dx_pitch = Dx.step;
|
||||
unsigned int Dy_pitch = Dy.step;
|
||||
|
||||
int src_offset_x = (src.offset % src.step) / src.elemSize();
|
||||
int src_offset_y = src.offset / src.step;
|
||||
|
||||
float _scale = scale;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_x ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src_offset_y ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dx_pitch ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dy.offset ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&Dy_pitch ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholecols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.wholerows ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&Dx.rows ));
|
||||
|
||||
args.push_back( std::make_pair( sizeof(cl_float), (void *)&_scale ));
|
||||
|
||||
String option = cv::format("-D BLK_X=%d -D BLK_Y=%d",(int)lt2[0],(int)lt2[1]);
|
||||
switch(src.type())
|
||||
{
|
||||
Dx.create(src.size(), CV_32FC1);
|
||||
Dy.create(src.size(), CV_32FC1);
|
||||
|
||||
const unsigned int block_x = 8;
|
||||
const unsigned int block_y = 8;
|
||||
|
||||
unsigned int src_pitch = src.step;
|
||||
unsigned int dst_pitch = Dx.cols;
|
||||
|
||||
float _scale = scale;
|
||||
|
||||
std::vector<std::pair<size_t , const void *> > args;
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&src.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dx.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_mem) , (void *)&Dy.data ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.cols ));
|
||||
args.push_back( std::make_pair( sizeof(cl_int) , (void *)&src.rows ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&src_pitch ));
|
||||
args.push_back( std::make_pair( sizeof(cl_uint) , (void *)&dst_pitch ));
|
||||
args.push_back( std::make_pair( sizeof(cl_float) , (void *)&_scale ));
|
||||
size_t gt2[3] = {src.cols, src.rows, 1}, lt2[3] = {block_x, block_y, 1};
|
||||
|
||||
String option = "-D BLK_X=8 -D BLK_Y=8";
|
||||
switch(borderType)
|
||||
{
|
||||
case cv::BORDER_REPLICATE:
|
||||
option += " -D BORDER_REPLICATE";
|
||||
break;
|
||||
case cv::BORDER_REFLECT:
|
||||
option += " -D BORDER_REFLECT";
|
||||
break;
|
||||
case cv::BORDER_REFLECT101:
|
||||
option += " -D BORDER_REFLECT101";
|
||||
break;
|
||||
case cv::BORDER_WRAP:
|
||||
option += " -D BORDER_WRAP";
|
||||
break;
|
||||
}
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, "sobel3", gt2, lt2, args, -1, -1, option.c_str() );
|
||||
case CV_8UC1:
|
||||
option += " -D SRCTYPE=uchar";
|
||||
break;
|
||||
case CV_32FC1:
|
||||
option += " -D SRCTYPE=float";
|
||||
break;
|
||||
}
|
||||
else
|
||||
switch(borderType)
|
||||
{
|
||||
case cv::BORDER_CONSTANT:
|
||||
option += " -D BORDER_CONSTANT";
|
||||
break;
|
||||
case cv::BORDER_REPLICATE:
|
||||
option += " -D BORDER_REPLICATE";
|
||||
break;
|
||||
case cv::BORDER_REFLECT:
|
||||
option += " -D BORDER_REFLECT";
|
||||
break;
|
||||
case cv::BORDER_REFLECT101:
|
||||
option += " -D BORDER_REFLECT_101";
|
||||
break;
|
||||
case cv::BORDER_WRAP:
|
||||
option += " -D BORDER_WRAP";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadFlag, "BORDER type is not supported!");
|
||||
break;
|
||||
}
|
||||
|
||||
String kernel_name;
|
||||
switch(ksize)
|
||||
{
|
||||
case -1:
|
||||
option += " -D SCHARR";
|
||||
kernel_name = "sobel3";
|
||||
break;
|
||||
case 3:
|
||||
kernel_name = "sobel3";
|
||||
break;
|
||||
case 5:
|
||||
kernel_name = "sobel5";
|
||||
break;
|
||||
case 7:
|
||||
kernel_name = "sobel7";
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadFlag, "Kernel size is not supported!");
|
||||
break;
|
||||
}
|
||||
openCLExecuteKernel(src.clCxt, &imgproc_sobel3, kernel_name, gt2, lt2, args, -1, -1, option.c_str() );
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ksize > 0)
|
||||
{
|
||||
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, 0, borderType);
|
||||
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, 0, borderType);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
|
||||
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
|
||||
else
|
||||
{
|
||||
Scharr(src, Dx, CV_32F, 1, 0, scale, 0, borderType);
|
||||
Scharr(src, Dy, CV_32F, 0, 1, scale, 0, borderType);
|
||||
}
|
||||
}
|
||||
CV_Assert(Dx.offset == 0 && Dy.offset == 0);
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ inline float sum(float val)
|
||||
return val;
|
||||
}
|
||||
|
||||
static float clamp1(float var, float learningRate, float diff, float minVar)
|
||||
inline float clamp1(float var, float learningRate, float diff, float minVar)
|
||||
{
|
||||
return fmax(var + learningRate * (diff * diff - var), minVar);
|
||||
}
|
||||
@ -96,7 +96,7 @@ inline float sum(const float4 val)
|
||||
return (val.x + val.y + val.z);
|
||||
}
|
||||
|
||||
static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
|
||||
inline void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_step)
|
||||
{
|
||||
float4 val = ptr[(k * rows + y) * ptr_step + x];
|
||||
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
|
||||
@ -104,7 +104,7 @@ static void swap4(__global float4* ptr, int x, int y, int k, int rows, int ptr_s
|
||||
}
|
||||
|
||||
|
||||
static float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
|
||||
inline float4 clamp1(const float4 var, float learningRate, const float4 diff, float minVar)
|
||||
{
|
||||
float4 result;
|
||||
result.x = fmax(var.x + learningRate * (diff.x * diff.x - var.x), minVar);
|
||||
@ -128,7 +128,7 @@ typedef struct
|
||||
uchar c_shadowVal;
|
||||
} con_srtuct_t;
|
||||
|
||||
static void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
|
||||
inline void swap(__global float* ptr, int x, int y, int k, int rows, int ptr_step)
|
||||
{
|
||||
float val = ptr[(k * rows + y) * ptr_step + x];
|
||||
ptr[(k * rows + y) * ptr_step + x] = ptr[((k + 1) * rows + y) * ptr_step + x];
|
||||
|
File diff suppressed because it is too large
Load Diff
185
modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl
Normal file
185
modules/ocl/src/opencl/filtering_sep_filter_singlepass.cl
Normal file
@ -0,0 +1,185 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2013, Intel Corporation, all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/////////////////////////////////Macro for border type////////////////////////////////////////////
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
//CCCCCC|abcdefgh|CCCCCCC
|
||||
#define EXTRAPOLATE(x, maxV)
|
||||
#elif defined BORDER_REPLICATE
|
||||
//aaaaaa|abcdefgh|hhhhhhh
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = max(min((x), (maxV) - 1), 0); \
|
||||
}
|
||||
#elif defined BORDER_WRAP
|
||||
//cdefgh|abcdefgh|abcdefg
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = ( (x) + (maxV) ) % (maxV); \
|
||||
}
|
||||
#elif defined BORDER_REFLECT
|
||||
//fedcba|abcdefgh|hgfedcb
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = min(((maxV)-1)*2-(x)+1, max((x),-(x)-1) ); \
|
||||
}
|
||||
#elif defined BORDER_REFLECT_101
|
||||
//gfedcb|abcdefgh|gfedcba
|
||||
#define EXTRAPOLATE(x, maxV) \
|
||||
{ \
|
||||
(x) = min(((maxV)-1)*2-(x), max((x),-(x)) ); \
|
||||
}
|
||||
#else
|
||||
#error No extrapolation method
|
||||
#endif
|
||||
|
||||
#define SRC(_x,_y) CONVERT_SRCTYPE(((global SRCTYPE*)(Src+(_y)*SrcPitch))[_x])
|
||||
|
||||
#ifdef BORDER_CONSTANT
|
||||
//CCCCCC|abcdefgh|CCCCCCC
|
||||
#define ELEM(_x,_y,r_edge,t_edge,const_v) (_x)<0 | (_x) >= (r_edge) | (_y)<0 | (_y) >= (t_edge) ? (const_v) : SRC((_x),(_y))
|
||||
#else
|
||||
#define ELEM(_x,_y,r_edge,t_edge,const_v) SRC((_x),(_y))
|
||||
#endif
|
||||
|
||||
#define DST(_x,_y) (((global DSTTYPE*)(Dst+DstOffset+(_y)*DstPitch))[_x])
|
||||
|
||||
//horizontal and vertical filter kernels
|
||||
//should be defined on host during compile time to avoid overhead
|
||||
__constant uint mat_kernelX[] = {KERNEL_MATRIX_X};
|
||||
__constant uint mat_kernelY[] = {KERNEL_MATRIX_Y};
|
||||
|
||||
__kernel __attribute__((reqd_work_group_size(BLK_X,BLK_Y,1))) void sep_filter_singlepass
|
||||
(
|
||||
__global uchar* Src,
|
||||
const uint SrcPitch,
|
||||
const int srcOffsetX,
|
||||
const int srcOffsetY,
|
||||
__global uchar* Dst,
|
||||
const int DstOffset,
|
||||
const uint DstPitch,
|
||||
int width,
|
||||
int height,
|
||||
int dstWidth,
|
||||
int dstHeight
|
||||
)
|
||||
{
|
||||
//RADIUSX, RADIUSY are filter dimensions
|
||||
//BLK_X, BLK_Y are local wrogroup sizes
|
||||
//all these should be defined on host during compile time
|
||||
//first lsmem array for source pixels used in first pass,
|
||||
//second lsmemDy for storing first pass results
|
||||
__local WORKTYPE lsmem[BLK_Y+2*RADIUSY][BLK_X+2*RADIUSX];
|
||||
__local WORKTYPE lsmemDy[BLK_Y][BLK_X+2*RADIUSX];
|
||||
|
||||
//get local and global ids - used as image and local memory array indexes
|
||||
int lix = get_local_id(0);
|
||||
int liy = get_local_id(1);
|
||||
|
||||
int x = (int)get_global_id(0);
|
||||
int y = (int)get_global_id(1);
|
||||
|
||||
//calculate pixel position in source image taking image offset into account
|
||||
int srcX = x + srcOffsetX - RADIUSX;
|
||||
int srcY = y + srcOffsetY - RADIUSY;
|
||||
int xb = srcX;
|
||||
int yb = srcY;
|
||||
|
||||
//extrapolate coordinates, if needed
|
||||
//and read my own source pixel into local memory
|
||||
//with account for extra border pixels, which will be read by starting workitems
|
||||
int clocY = liy;
|
||||
int cSrcY = srcY;
|
||||
do
|
||||
{
|
||||
int yb = cSrcY;
|
||||
EXTRAPOLATE(yb, (height));
|
||||
|
||||
int clocX = lix;
|
||||
int cSrcX = srcX;
|
||||
do
|
||||
{
|
||||
int xb = cSrcX;
|
||||
EXTRAPOLATE(xb,(width));
|
||||
lsmem[clocY][clocX] = ELEM(xb, yb, (width), (height), 0 );
|
||||
|
||||
clocX += BLK_X;
|
||||
cSrcX += BLK_X;
|
||||
}
|
||||
while(clocX < BLK_X+(RADIUSX*2));
|
||||
|
||||
clocY += BLK_Y;
|
||||
cSrcY += BLK_Y;
|
||||
}
|
||||
while(clocY < BLK_Y+(RADIUSY*2));
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//do vertical filter pass
|
||||
//and store intermediate results to second local memory array
|
||||
int i;
|
||||
WORKTYPE sum = 0.0f;
|
||||
int clocX = lix;
|
||||
do
|
||||
{
|
||||
sum = 0.0f;
|
||||
for(i=0; i<=2*RADIUSY; i++)
|
||||
sum = mad(lsmem[liy+i][clocX], as_float(mat_kernelY[i]), sum);
|
||||
lsmemDy[liy][clocX] = sum;
|
||||
clocX += BLK_X;
|
||||
}
|
||||
while(clocX < BLK_X+(RADIUSX*2));
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
//if this pixel happened to be out of image borders because of global size rounding,
|
||||
//then just return
|
||||
if( x >= dstWidth || y >=dstHeight ) return;
|
||||
|
||||
//do second horizontal filter pass
|
||||
//and calculate final result
|
||||
sum = 0.0f;
|
||||
for(i=0; i<=2*RADIUSX; i++)
|
||||
sum = mad(lsmemDy[liy][lix+i], as_float(mat_kernelX[i]), sum);
|
||||
|
||||
//store result into destination image
|
||||
DST(x,y) = CONVERT_DSTTYPE(sum);
|
||||
}
|
@ -126,13 +126,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
)
|
||||
|
||||
{
|
||||
// this version used information provided for each workgroup
|
||||
// no empty WG
|
||||
int gid = (int)get_group_id(0);
|
||||
int lid_x = (int)get_local_id(0);
|
||||
int lid_y = (int)get_local_id(1);
|
||||
int lid = lid_y*LSx+lid_x;
|
||||
int4 WGInfo = pWGInfo[gid];
|
||||
int4 WGInfo = pWGInfo[WGSTART+gid];
|
||||
int GroupX = (WGInfo.y >> 16)&0xFFFF;
|
||||
int GroupY = (WGInfo.y >> 0 )& 0xFFFF;
|
||||
int Width = (WGInfo.x >> 16)&0xFFFF;
|
||||
@ -140,8 +138,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int ImgOffset = WGInfo.z;
|
||||
float ScaleFactor = as_float(WGInfo.w);
|
||||
|
||||
#define DATA_SIZE_X (LSx+WND_SIZE_X)
|
||||
#define DATA_SIZE_Y (LSy+WND_SIZE_Y)
|
||||
#define DATA_SIZE_X (PIXEL_STEP*LSx+WND_SIZE_X)
|
||||
#define DATA_SIZE_Y (PIXEL_STEP*LSy+WND_SIZE_Y)
|
||||
#define DATA_SIZE (DATA_SIZE_X*DATA_SIZE_Y)
|
||||
|
||||
local int SumL[DATA_SIZE];
|
||||
@ -165,9 +163,11 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int4 info1 = p;
|
||||
int4 info2 = pq;
|
||||
|
||||
{
|
||||
int xl = lid_x;
|
||||
int yl = lid_y;
|
||||
// calc processed ROI coordinate in local mem
|
||||
int xl = lid_x*PIXEL_STEP;
|
||||
int yl = lid_y*PIXEL_STEP;
|
||||
|
||||
{// calc variance_norm_factor for all stages
|
||||
int OffsetLocal = yl * DATA_SIZE_X + xl;
|
||||
int OffsetGlobal = (GroupY+yl)* pixelstep + (GroupX+xl);
|
||||
|
||||
@ -194,13 +194,13 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
|
||||
int result = (1.0f>0.0f);
|
||||
for(int stageloop = start_stage; (stageloop < end_stage) && result; stageloop++ )
|
||||
{// iterate until candidate is exist
|
||||
{// iterate until candidate is valid
|
||||
float stage_sum = 0.0f;
|
||||
__global GpuHidHaarStageClassifier* stageinfo = (__global GpuHidHaarStageClassifier*)
|
||||
((__global uchar*)stagecascadeptr+stageloop*sizeof(GpuHidHaarStageClassifier));
|
||||
int lcl_off = (yl*DATA_SIZE_X)+(xl);
|
||||
int stagecount = stageinfo->count;
|
||||
float stagethreshold = stageinfo->threshold;
|
||||
int lcl_off = (lid_y*DATA_SIZE_X)+(lid_x);
|
||||
for(int nodeloop = 0; nodeloop < stagecount; nodecounter++,nodeloop++ )
|
||||
{
|
||||
// simple macro to extract shorts from int
|
||||
@ -212,7 +212,7 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int4 n1 = pN[1];
|
||||
int4 n2 = pN[2];
|
||||
float nodethreshold = as_float(n2.y) * variance_norm_factor;
|
||||
// calc sum of intensity pixels according to node information
|
||||
// calc sum of intensity pixels according to classifier node information
|
||||
float classsum =
|
||||
(SumL[M0(n0.x)+lcl_off] - SumL[M1(n0.x)+lcl_off] - SumL[M0(n0.y)+lcl_off] + SumL[M1(n0.y)+lcl_off]) * as_float(n1.z) +
|
||||
(SumL[M0(n0.z)+lcl_off] - SumL[M1(n0.z)+lcl_off] - SumL[M0(n0.w)+lcl_off] + SumL[M1(n0.w)+lcl_off]) * as_float(n1.w) +
|
||||
@ -228,8 +228,8 @@ __kernel void gpuRunHaarClassifierCascadePacked(
|
||||
int index = 1+atomic_inc((volatile global int*)candidate); //get index to write global data with face info
|
||||
if(index<OUTPUTSZ)
|
||||
{
|
||||
int x = GroupX+lid_x;
|
||||
int y = GroupY+lid_y;
|
||||
int x = GroupX+xl;
|
||||
int y = GroupY+yl;
|
||||
int4 candidate_result;
|
||||
candidate_result.x = convert_int_rtn(x*ScaleFactor);
|
||||
candidate_result.y = convert_int_rtn(y*ScaleFactor);
|
||||
|
@ -381,8 +381,8 @@ struct PtrStepSz {
|
||||
int step;
|
||||
int rows, cols;
|
||||
};
|
||||
inline int get(struct PtrStepSz data, int y, int x) { return *((__global int *)((__global char*)data.ptr + data.step * y + sizeof(int) * x)); }
|
||||
inline void set(struct PtrStepSz data, int y, int x, int value) { *((__global int *)((__global char*)data.ptr + data.step * y + sizeof(int) * x)) = value; }
|
||||
inline int get(struct PtrStepSz data, int y, int x) { return *((__global int *)((__global char*)data.ptr + data.step * (y + 1) + sizeof(int) * (x + 1))); }
|
||||
inline void set(struct PtrStepSz data, int y, int x, int value) { *((__global int *)((__global char*)data.ptr + data.step * (y + 1) + sizeof(int) * (x + 1))) = value; }
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// do Hysteresis for pixel whose edge type is 1
|
||||
@ -494,7 +494,7 @@ edgesHysteresisLocal
|
||||
}
|
||||
}
|
||||
#else
|
||||
struct PtrStepSz map = {((__global int *)((__global char*)map_ptr + map_offset)), map_step, rows, cols};
|
||||
struct PtrStepSz map = {((__global int *)((__global char*)map_ptr + map_offset)), map_step, rows + 1, cols + 1};
|
||||
|
||||
__local int smem[18][18];
|
||||
|
||||
@ -507,13 +507,13 @@ edgesHysteresisLocal
|
||||
|
||||
smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? get(map, y, x) : 0;
|
||||
if (threadIdx.y == 0)
|
||||
smem[0][threadIdx.x + 1] = y > 0 ? get(map, y - 1, x) : 0;
|
||||
smem[0][threadIdx.x + 1] = x < map.cols ? get(map, y - 1, x) : 0;
|
||||
if (threadIdx.y == blockDim.y - 1)
|
||||
smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? get(map, y + 1, x) : 0;
|
||||
if (threadIdx.x == 0)
|
||||
smem[threadIdx.y + 1][0] = x > 0 ? get(map, y, x - 1) : 0;
|
||||
smem[threadIdx.y + 1][0] = y < map.rows ? get(map, y, x - 1) : 0;
|
||||
if (threadIdx.x == blockDim.x - 1)
|
||||
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols ? get(map, y, x + 1) : 0;
|
||||
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols && y < map.rows ? get(map, y, x + 1) : 0;
|
||||
if (threadIdx.x == 0 && threadIdx.y == 0)
|
||||
smem[0][0] = y > 0 && x > 0 ? get(map, y - 1, x - 1) : 0;
|
||||
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
|
||||
@ -525,7 +525,7 @@ edgesHysteresisLocal
|
||||
|
||||
barrier(CLK_LOCAL_MEM_FENCE);
|
||||
|
||||
if (x >= map.cols || y >= map.rows)
|
||||
if (x >= cols || y >= rows)
|
||||
return;
|
||||
|
||||
int n;
|
||||
@ -576,7 +576,7 @@ edgesHysteresisLocal
|
||||
if (n > 0)
|
||||
{
|
||||
const int ind = atomic_inc(counter);
|
||||
st[ind] = (ushort2)(x, y);
|
||||
st[ind] = (ushort2)(x + 1, y + 1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user